{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:06.524220500Z",
     "start_time": "2024-09-24T14:04:05.735492400Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "# 对pandas和matplotlib的显示设置\n",
    "pd.set_option('display.max_columns', 30)\n",
    "plt.rcParams.update({\"font.family\":\"SimHei\",\"font.size\":14})\n",
    "plt.style.use(\"tableau-colorblind10\")"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 数据清洗"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "266b5ad2761ace7"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# data_user_log = pd.read_csv(\"user_log_format1.csv\")  # 初次导入数据时启用\n",
    "data_user_info = pd.read_csv(\"user_info_format1.csv\")\n",
    "data_train = pd.read_csv(\"train_format1.csv\")\n",
    "data_test = pd.read_csv(\"test_format1.csv\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:06.665224300Z",
     "start_time": "2024-09-24T14:04:06.520220700Z"
    }
   },
   "id": "55d59971182fb0af",
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 二次导入数据时，指定数据类型以压缩内存\n",
    "d_types = {'user_id': 'int32', 'item_id': 'int32', 'cat_id': 'int16', 'seller_id': 'int16', 'brand_id': 'float32', 'time_stamp': 'int16', 'action_type': 'int8'}\n",
    "data_user_log = pd.read_csv(\"user_log_format1.csv\",dtype = d_types)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.307937100Z",
     "start_time": "2024-09-24T14:04:06.666225Z"
    }
   },
   "id": "8f78013185e623cc",
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   user_id  item_id  cat_id  seller_id  brand_id  time_stamp  action_type\n0   328862   323294     833       2882    2661.0         829            0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>item_id</th>\n      <th>cat_id</th>\n      <th>seller_id</th>\n      <th>brand_id</th>\n      <th>time_stamp</th>\n      <th>action_type</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>328862</td>\n      <td>323294</td>\n      <td>833</td>\n      <td>2882</td>\n      <td>2661.0</td>\n      <td>829</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": "   user_id  age_range  gender\n0   376517        6.0     1.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>age_range</th>\n      <th>gender</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>376517</td>\n      <td>6.0</td>\n      <td>1.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": "   user_id  merchant_id  label\n0    34176         3906      0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>34176</td>\n      <td>3906</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": "   user_id  merchant_id  prob\n0   163968         4605   NaN",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>user_id</th>\n      <th>merchant_id</th>\n      <th>prob</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>163968</td>\n      <td>4605</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(data_user_log.head(1))\n",
    "display(data_user_info.head(1))\n",
    "display(data_train.head(1))\n",
    "display(data_test.head(1))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.326093Z",
     "start_time": "2024-09-24T14:04:26.308936400Z"
    }
   },
   "id": "1f309cf2c5805af9",
   "execution_count": 4
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 查看数据类型"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "71f31ff14229589"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 54925330 entries, 0 to 54925329\n",
      "Data columns (total 7 columns):\n",
      " #   Column       Dtype  \n",
      "---  ------       -----  \n",
      " 0   user_id      int32  \n",
      " 1   item_id      int32  \n",
      " 2   cat_id       int16  \n",
      " 3   seller_id    int16  \n",
      " 4   brand_id     float32\n",
      " 5   time_stamp   int16  \n",
      " 6   action_type  int8   \n",
      "dtypes: float32(1), int16(3), int32(2), int8(1)\n",
      "memory usage: 995.2 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 424170 entries, 0 to 424169\n",
      "Data columns (total 3 columns):\n",
      " #   Column     Non-Null Count   Dtype  \n",
      "---  ------     --------------   -----  \n",
      " 0   user_id    424170 non-null  int64  \n",
      " 1   age_range  421953 non-null  float64\n",
      " 2   gender     417734 non-null  float64\n",
      "dtypes: float64(2), int64(1)\n",
      "memory usage: 9.7 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 260864 entries, 0 to 260863\n",
      "Data columns (total 3 columns):\n",
      " #   Column       Non-Null Count   Dtype\n",
      "---  ------       --------------   -----\n",
      " 0   user_id      260864 non-null  int64\n",
      " 1   merchant_id  260864 non-null  int64\n",
      " 2   label        260864 non-null  int64\n",
      "dtypes: int64(3)\n",
      "memory usage: 6.0 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 261477 entries, 0 to 261476\n",
      "Data columns (total 3 columns):\n",
      " #   Column       Non-Null Count   Dtype  \n",
      "---  ------       --------------   -----  \n",
      " 0   user_id      261477 non-null  int64  \n",
      " 1   merchant_id  261477 non-null  int64  \n",
      " 2   prob         0 non-null       float64\n",
      "dtypes: float64(1), int64(2)\n",
      "memory usage: 6.0 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(data_user_log.info())\n",
    "display(data_user_info.info())\n",
    "display(data_train.info())\n",
    "display(data_test.info())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.462423Z",
     "start_time": "2024-09-24T14:04:26.327092500Z"
    }
   },
   "id": "12350917c7ca2ef4",
   "execution_count": 5
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 压缩数据"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "be797ed9749eb422"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 522341 entries, 0 to 261476\n",
      "Data columns (total 4 columns):\n",
      " #   Column       Non-Null Count   Dtype  \n",
      "---  ------       --------------   -----  \n",
      " 0   user_id      522341 non-null  int64  \n",
      " 1   merchant_id  522341 non-null  int64  \n",
      " 2   label        260864 non-null  float64\n",
      " 3   origin       522341 non-null  object \n",
      "dtypes: float64(1), int64(2), object(1)\n",
      "memory usage: 19.9+ MB\n"
     ]
    }
   ],
   "source": [
    "# 拼接train、test数据，方便下一步提取特征\n",
    "data_train[\"origin\"] = \"train\"\n",
    "data_test[\"origin\"] = \"test\"\n",
    "data = pd.concat([data_train,data_test],sort = False)\n",
    "data = data.drop([\"prob\"],axis = 1)\n",
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.644858600Z",
     "start_time": "2024-09-24T14:04:26.349723500Z"
    }
   },
   "id": "6cb134c2f280f026",
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 所有列都是数值型，直接downcast\n",
    "# 初次压缩时，对所有数据集进行压缩\n",
    "# list = [data,data_user_log,data_user_info]\n",
    "\n",
    "# 二次导入时无需重复data_user_log压缩\n",
    "list = [data,data_user_info]\n",
    "\n",
    "for df in list:\n",
    "    fcols = df.select_dtypes('float').columns\n",
    "    icols = df.select_dtypes('integer').columns\n",
    "    df[fcols] = df[fcols].apply(pd.to_numeric, downcast='float')\n",
    "    df[icols] = df[icols].apply(pd.to_numeric, downcast='integer')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.732858500Z",
     "start_time": "2024-09-24T14:04:26.388424300Z"
    }
   },
   "id": "f7efb6359227b8f8",
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 54925330 entries, 0 to 54925329\n",
      "Data columns (total 7 columns):\n",
      " #   Column       Dtype  \n",
      "---  ------       -----  \n",
      " 0   user_id      int32  \n",
      " 1   item_id      int32  \n",
      " 2   cat_id       int16  \n",
      " 3   seller_id    int16  \n",
      " 4   brand_id     float32\n",
      " 5   time_stamp   int16  \n",
      " 6   action_type  int8   \n",
      "dtypes: float32(1), int16(3), int32(2), int8(1)\n",
      "memory usage: 995.2 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 424170 entries, 0 to 424169\n",
      "Data columns (total 3 columns):\n",
      " #   Column     Non-Null Count   Dtype  \n",
      "---  ------     --------------   -----  \n",
      " 0   user_id    424170 non-null  int32  \n",
      " 1   age_range  421953 non-null  float32\n",
      " 2   gender     417734 non-null  float32\n",
      "dtypes: float32(2), int32(1)\n",
      "memory usage: 4.9 MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 522341 entries, 0 to 261476\n",
      "Data columns (total 4 columns):\n",
      " #   Column       Non-Null Count   Dtype  \n",
      "---  ------       --------------   -----  \n",
      " 0   user_id      522341 non-null  int32  \n",
      " 1   merchant_id  522341 non-null  int16  \n",
      " 2   label        260864 non-null  float32\n",
      " 3   origin       522341 non-null  object \n",
      "dtypes: float32(1), int16(1), int32(1), object(1)\n",
      "memory usage: 13.0+ MB\n"
     ]
    },
    {
     "data": {
      "text/plain": "None"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(data_user_log.info())\n",
    "display(data_user_info.info())\n",
    "display(data.info())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.735859300Z",
     "start_time": "2024-09-24T14:04:26.496123Z"
    }
   },
   "id": "b9f85463e1052a10",
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 记录数据类型，二次导入时用\n",
    "# d_col = data_user_log.dtypes.index\n",
    "# d_type = [i.name for i in data_user_log.dtypes.values]\n",
    "# column_dict = dict(zip(d_col,d_type))\n",
    "# print(column_dict)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.737858800Z",
     "start_time": "2024-09-24T14:04:26.526755300Z"
    }
   },
   "id": "4377250fcfcb19eb",
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统一字段名\n",
    "data_user_log.rename(columns = {\"seller_id\":\"merchant_id\"},inplace = True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.750859100Z",
     "start_time": "2024-09-24T14:04:26.529264600Z"
    }
   },
   "id": "5dc6b854e37696c8",
   "execution_count": 10
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 空值处理"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b4496596c4630b02"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 424170 entries, 0 to 424169\n",
      "Data columns (total 3 columns):\n",
      " #   Column     Non-Null Count   Dtype  \n",
      "---  ------     --------------   -----  \n",
      " 0   user_id    424170 non-null  int32  \n",
      " 1   age_range  424170 non-null  float32\n",
      " 2   gender     424170 non-null  float32\n",
      "dtypes: float32(2), int32(1)\n",
      "memory usage: 4.9 MB\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86166\\AppData\\Local\\Temp\\ipykernel_17832\\690054255.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  data_user_info[\"age_range\"].fillna(0,inplace = True)  # 0和null代表未知\n",
      "C:\\Users\\86166\\AppData\\Local\\Temp\\ipykernel_17832\\690054255.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  data_user_info[\"gender\"].fillna(0,inplace = True)  # 2和null代表未知\n"
     ]
    }
   ],
   "source": [
    "# 年龄、性别列存在null值，填补空值\n",
    "data_user_info[\"age_range\"].fillna(0,inplace = True)  # 0和null代表未知\n",
    "data_user_info[\"gender\"].fillna(0,inplace = True)  # 2和null代表未知\n",
    "\n",
    "data_user_info.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.752858300Z",
     "start_time": "2024-09-24T14:04:26.535297300Z"
    }
   },
   "id": "d6ed1b57dfea6941",
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "user_id            0\nitem_id            0\ncat_id             0\nmerchant_id        0\nbrand_id       91015\ntime_stamp         0\naction_type        0\ndtype: int64"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查user_log空值\n",
    "data_user_log.isna().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.904367100Z",
     "start_time": "2024-09-24T14:04:26.544857100Z"
    }
   },
   "id": "b61156d69c21304e",
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86166\\AppData\\Local\\Temp\\ipykernel_17832\\2905823642.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  data_user_log[\"brand_id\"].fillna(0, inplace = True)\n"
     ]
    }
   ],
   "source": [
    "# brand_id列有较多空值，以0填充\n",
    "data_user_log[\"brand_id\"].fillna(0, inplace = True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:26.945473Z",
     "start_time": "2024-09-24T14:04:26.877366200Z"
    }
   },
   "id": "98b602eac298bc4c",
   "execution_count": 13
  },
  {
   "cell_type": "markdown",
   "source": [
    "数据初步探索"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "dfa814fae7b4a6bd"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='age_range'>"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkgAAAHBCAYAAACSSUQiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/zUlEQVR4nO3de1xVZd7///cCFDYoOykCYTQFFcmSUeyoUjh5wMzSunNSSzuYh9BMy0NldjKHytT0HpsZsxp1TKSTxTSWmT70zkLHMU+Tk5nKpCUQCiogsK/fH/5YX/famIc4CL6ej8f6Y6/12df6XBuJd2uvg2WMMQIAAIDNr7YbAAAAON8QkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcAio7QbqIo/Ho/3796tx48ayLKu22wEAAGfAGKPCwkJFRUXJz++XjxERkM7B/v371axZs9puAwAAnIPs7Gz95je/+cUaAtI5aNy4saQTH3BoaGgtdwMAAM5EQUGBmjVrZv8d/yUEpHNQ8bVaaGgoAQkAgDrmTE6P4SRtAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwCarsBALXLSl1co/szcwfV6P4A4FxwBAkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABx+VUB66KGHdOONN1a67YMPPlDHjh0VHBysK6+8UitXrvSp8Xg8SktLU2xsrFwul3r27Kndu3f71OXl5WnYsGGKjIyU2+3W8OHDVVRU5FO3bds2paSkyO12Kzo6WrNmzTrn3gAAwIXrnAPSiy++qD/+8Y+Vblu6dKn69eunsrIyTZ8+XXFxcerdu7e2bNniVZeamqpJkybp8ssvV1pamnJzc9W9e3cdPXrUrikuLlaPHj20YMEC3XnnnXryySf1/vvv69577/Uaa+fOnUpKSlJWVpYmTpyoQYMGafz48XrttdfOqTcAAHDhsowx5mzeUFpaqtGjR+vNN99U48aN1a5dO61evdrefvToUbVs2VKNGjXS5s2bFRoaKmOMkpOT5efnp1WrVkmSNmzYoKuvvlp33HGH0tPTZVmWfv75Z8XGxuqRRx7RU089JUl66aWXNGHCBM2ZM0epqamSpM8//1zdunXTmjVrlJSUJEm6+eab9cknnygrK0sdOnSQJE2dOlUzZ87UgQMHFBIScsa9nU5BQYHcbrcOHz6s0NDQs/n4gPOOlbq4Rvdn5g6q0f0BQIWz+ft91keQvvjiC2VmZmrlypVq166dz/bVq1crJydHqamp9s4ty9KIESO0Zs0a5ebmSpLS09MlSZMnT5ZlWZKksLAwDRgwQBkZGfZ46enpCgsL0/Dhw+11ycnJiouLs+sKCgq0YsUKpaSk2OFIkkaOHKnCwkKtWLHirHoDAAAXtrMOSG3bttW2bdvUpUuXSrdv3bpV0okQc7LExER5PB5t3rzZrnO73V6BpqJu+/btOn78uF3XpUsXNWjQwKdu06ZNkqQdO3aovLzcZ5+RkZGKioqy6860NwAAcGELONs3RERE/OL2/Px8SVJMTEyl79uzZ49d17JlS/vo0cl1Ho9H+/btU3R0tEpKSnzGqqhbs2bNL+6zou7kfZ5Jb04lJSUqKSmxXxcUFFRaBwAA6ocqv8zf4/FIkho1auS1Pjg4WJJ06NAhu85Z46w71VgVdSeP9WvqnL05TZ8+XW63216aNWtWaR0AAKgfqjwgBQcHy7IsOc/9rnhdXFxs11Xm5DqXy+VzhOnkutONVVndmfTmNHnyZB0+fNhesrOzT7k/AABQ91V5QIqOjpYxxidE5OTkSJJ9cnR0dLT27dvn8/6T6/z8/BQZGXnKupPHknRGdWfSm1NgYKBCQ0O9FgAAUH9VeUCqOOl67dq1XuuzsrIkSU2bNrXrsrOztXfv3tPWrVu3zmc/WVlZdk1sbKxCQ0N99pmfn69du3Z5jXUmvQEAgAtblQekxMRENW/eXHPnzlVZWZm9fv78+fL397evIOvXr58kafbs2XbNsWPHtGTJEiUkJCg8PFyS1L9/f+3evVvLly+36zZu3Kivv/5a3bt3lyT5+/urb9++ysjI0A8//GDXvf766zLG2HVn2hsAALiwVXlA8vPz09SpU7Vhwwbddddd+uyzzzRq1ChlZmZq8ODBuuSSSyRJrVq10uDBgzVz5kw988wz+vTTT5WSkqKDBw9q7Nix9ngDBw5UmzZtNGTIEL311lt655131L9/fwUFBXndG2nSpEkqLS1VSkqKMjMzNXv2bE2ZMkVxcXHq1avXWfUGAAAubGd9mf+ZuO+++5SXl6cpU6bYN3Ps06ePXn31Va+6efPmyRijp59+WtKJI0GPP/64hg4date4XC5lZmZq4MCB9vomTZpo0aJFio+Pt+vatWtnP4KkT58+kqT4+HgtW7ZMQUFBZ90bAAC4cJ31o0bOxk8//aSsrCxFRUUpMTHxlHXffPONdu7cqYSEBLVo0aLSGmOM1q9fr/z8fHXu3FkXXXRRpXVFRUVau3atAgIC1LVrV58bTJ5tb5XhUSOoT3jUCIALxdn8/a7WgFRfEZBQnxCQAFwoqvVZbAAAAPUdAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA7VcidtnBr3nAEA4PzHESQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAAh2oJSKWlpZo8ebJiYmIUFBSkqKgoDRkyRPv37/eq++CDD9SxY0cFBwfryiuv1MqVK33G8ng8SktLU2xsrFwul3r27Kndu3f71OXl5WnYsGGKjIyU2+3W8OHDVVRU5FO3bds2paSkyO12Kzo6WrNmzaqyeQMAgPohoDoGHTNmjN566y09/PDDatWqlf7zn/9o3rx5Wr9+vbZt26aGDRtq6dKluuuuu3TFFVdo+vTpWrt2rXr37q2NGzeqffv29lipqamaN2+e+vTpo4cfflhvvfWWunfvri1btigkJESSVFxcrB49emjz5s166KGH1KxZM7388ss6fPiw3n77bXusnTt3KikpSZZlaeLEiTp06JDGjx+voKAgjRgxojo+CgAAUAdVeUA6evSo5s+fr4kTJ+r555+318fFxen+++/XunXrdM0112j06NFq0aKF1q1bp9DQUI0ZM0bJyckaO3asVq1aJUnasGGD5s2bpzvuuEPp6emyLEuDBw9WbGysZsyYoaeeekqSNGfOHG3atElz5sxRamqqJKlTp07q1q2bRo0apaSkJEnSuHHjVFhYqKysLHXo0EGS5HK5NGHCBN1999124AIAABe2Kv+K7ciRIyorK1OjRo281gcGBkqSAgICtHr1auXk5Cg1NVWhoaGSJMuyNGLECK1Zs0a5ubmSpPT0dEnS5MmTZVmWJCksLEwDBgxQRkaGPXZ6errCwsI0fPhwe11ycrLi4uLsuoKCAq1YsUIpKSl2OJKkkSNHqrCwUCtWrKjqjwIAANRRVR6QIiIi1LFjR82aNUtr165VUVGRNmzYoKefflqXXXaZrr76am3dulXSiRBzssTERHk8Hm3evFmStHXrVrndbq9AU1G3fft2HT9+3K7r0qWLGjRo4FO3adMmSdKOHTtUXl7us8/IyEhFRUXZdZUpKSlRQUGB1wIAAOqvajkHKTMzUz179rS/2pKk3/72t3r33XcVFBSk/Px8SVJMTIzX+yIiIiRJe/bskSTl5+erZcuW9tGjk+s8Ho/27dun6OholZSU+IxVUbdmzRp7rMr2WVFXsc/KTJ8+Xc8888xpZg3gfGOlLq7R/Zm5g2p0fwCqT7VcxfaXv/xFW7Zs0bXXXqv7779fnTp10tdff63nnntO5eXl8ng8kuTzNVxwcLAk6dChQ5JOXMHmrHHWnWqsirqTxzqTuspMnjxZhw8ftpfs7OxfmD0AAKjrqvwI0o4dOzR16lQ988wz9knUkjRt2jQ9+eST6tq1q4KDg2VZlowxXu+teF1cXCzpRHCpCDanqnO5XD5HmE6uO3msUzm5rjKBgYH2OVQAAKD+q/IjSJ988omMMRo7dqzX+gkTJqhhw4Zas2aNoqOjZYzxORKTk5MjSfaJ29HR0dq3b5/PPk6u8/PzU2Rk5CnrTh5L0mnrAAAAqjwgVRzdqTiBukJxcbE8Ho/Kysrsk67Xrl3rVZOVlSVJatq0qSSpQ4cOys7O1t69e09bt27dOp9esrKy7JrY2FiFhob67DM/P1+7du2y6wAAAKo8IFWcBP3mm2/a64wxmjZtmsrKypSYmKjExEQ1b95cc+fOVVlZmV03f/58+fv721ea9evXT5I0e/Zsu+bYsWNasmSJEhISFB4eLknq37+/du/ereXLl9t1Gzdu1Ndff63u3btLkvz9/dW3b19lZGTohx9+sOtef/11GWPsOgAAgCoPSD169FDr1q312GOPqX379urVq5diYmKUlpammJgYDR8+XH5+fpo6dao2bNigu+66S5999plGjRqlzMxMDR48WJdccokkqVWrVho8eLBmzpypZ555Rp9++qlSUlJ08OBBr6/wBg4cqDZt2mjIkCF666239M4776h///4KCgryujfSpEmTVFpaqpSUFGVmZmr27NmaMmWK4uLi1KtXr6r+KAAAQB1V5Sdpu1wurVmzRk888YRWrFihf//732rcuLFuu+02vfTSS/bJ0vfdd5/y8vI0ZcoU+2aOffr00auvvuo13rx582SM0dNPPy3pxJGgxx9/XEOHDvXaZ2ZmpgYOHGivb9KkiRYtWqT4+Hi7rl27dnr//fd17733qk+fPpKk+Ph4LVu2TEFBQVX9UQAAgDrKMs5LyWrYTz/9pKysLEVFRSkxMfGUdd9884127typhIQEtWjRotIaY4zWr1+v/Px8de7cWRdddFGldUVFRVq7dq0CAgLUtWtXnxtMnk5BQYHcbrcOHz581id3c18WnG/q87/J+jw3AGfvbP5+V8uNIs9GRESEbrnlltPWtW3bVm3btv3FGsuydP311592LJfLpR49epxxjwAA4MJSLTeKBAAAqMsISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADhUe0DKy8tT06ZN1bVrVxljvLZ98MEH6tixo4KDg3XllVdq5cqVPu/3eDxKS0tTbGysXC6Xevbsqd27d1e6n2HDhikyMlJut1vDhw9XUVGRT922bduUkpIit9ut6OhozZo1q8rmCgAA6odqD0gPPfSQ8vPzNX/+fFmWZa9funSp+vXrp7KyMk2fPl1xcXHq3bu3tmzZ4vX+1NRUTZo0SZdffrnS0tKUm5ur7t276+jRo3ZNcXGxevTooQULFujOO+/Uk08+qffff1/33nuv11g7d+5UUlKSsrKyNHHiRA0aNEjjx4/Xa6+9Vr0fAgAAqFMCqnPwjIwMLV26VNOmTVNcXJy9/ujRoxo9erRatGihdevWKTQ0VGPGjFFycrLGjh2rVatWSZI2bNigefPm6Y477lB6erosy9LgwYMVGxurGTNm6KmnnpIkzZkzR5s2bdKcOXOUmpoqSerUqZO6deumUaNGKSkpSZI0btw4FRYWKisrSx06dJAkuVwuTZgwQXfffbdCQkKq8+MAAAB1RLUdQTp48KBGjhyphIQETZgwwWvb6tWrlZOTo9TUVIWGhkqSLMvSiBEjtGbNGuXm5kqS0tPTJUmTJ0+2jz6FhYVpwIABysjIsMdLT09XWFiYhg8fbq9LTk5WXFycXVdQUKAVK1YoJSXFDkeSNHLkSBUWFmrFihXV8CkAAIC6qNoC0ogRI5Sbm6thw4bpww8/1D//+U9729atWyWdCDEnS0xMlMfj0ebNm+06t9vtFWgq6rZv367jx4/bdV26dFGDBg186jZt2iRJ2rFjh8rLy332GRkZqaioKLuuMiUlJSooKPBaAABA/VUtAWn58uV67733JEkTJ07UkCFD1KlTJyUnJ+vAgQPKz8+XJMXExHi9LyIiQpK0Z88eSVJ+fr5atmzpde5SRZ3H49G+fftUVFSkkpISn7Eq6k4eq7J9OusqM336dLndbntp1qzZ6T8EAABQZ1VLQJo4caIk6eWXX1Z+fr4OHz6sd999V+vXr9ftt98uj8cjSWrUqJHX+4KDgyVJhw4dknTiCjZnjbPuVGNV1J081pnUVWby5Mk6fPiwvWRnZ5+yFgAA1H1VfpL2zp079c0336hr164aP368vb5fv3568MEHNWfOHEVHR8uyLJ/L/iteFxcXSzoRXCqCzanqXC6XzxGmk+tOHutUTq6rTGBgoAIDA0+5HQAA1C9VfgQpLy9PknTTTTf5bGvbtq29zRjjcyQmJydHkuwTt6Ojo7Vv3z6fcU6u8/PzU2Rk5CnrTh5L0mnrAAAAqjwgVQSRyo7q/Pjjj5L+X1Bau3at1/asrCxJUtOmTSVJHTp0UHZ2tvbu3XvaunXr1vnsLysry66JjY1VaGiozz7z8/O1a9cuuw4AAKDKA9Jll12mNm3aKD09XaWlpfb6Y8eOaeHChYqOjlbXrl3VvHlzzZ07V2VlZXbN/Pnz5e/vb19p1q9fP0nS7NmzvcZZsmSJEhISFB4eLknq37+/du/ereXLl9t1Gzdu1Ndff63u3btLkvz9/dW3b19lZGTohx9+sOtef/11GWPsOgAAgGo5SfuVV17RN998o+uuu06zZ8/WSy+9pMTERO3Zs0dpaWny8/PT1KlTtWHDBt1111367LPPNGrUKGVmZmrw4MG65JJLJEmtWrXS4MGDNXPmTD3zzDP69NNPlZKSooMHD2rs2LH2/gYOHKg2bdpoyJAheuutt/TOO++of//+CgoK8ro30qRJk1RaWqqUlBRlZmZq9uzZmjJliuLi4tSrV6/q+CgAAEAdVC130r755pu1evVqvfjii3rhhRdUWFio1q1b6+2339aAAQMkSffdd5/y8vI0ZcoU+2aOffr00auvvuo11rx582SM0dNPPy3pxJGgxx9/XEOHDrVrXC6XMjMzNXDgQHt9kyZNtGjRIsXHx9t17dq1sx9B0qdPH0lSfHy8li1bpqCgoOr4KAAAQB1kGeelZDXsp59+UlZWlqKiopSYmHjKum+++UY7d+5UQkKCWrRoUWmNMUbr169Xfn6+OnfurIsuuqjSuqKiIq1du1YBAQHq2rWrzw0mT6egoEBut1uHDx8+65O7rdTFZ1X/a5m5g2p0f6h76vO/yfo8NwBn72z+flfrs9jOREREhG655ZbT1rVt29Y+uftULMvS9ddff9qxXC6XevToccY9AgCAC0u1PWoEAACgriIgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAACHgNpuAKgLrNTFNbo/M3dQje4PAOCNI0gAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAIcaCUhHjhxRq1atNHToUK/1H3zwgTp27Kjg4GBdeeWVWrlypc97PR6P0tLSFBsbK5fLpZ49e2r37t0+dXl5eRo2bJgiIyPldrs1fPhwFRUV+dRt27ZNKSkpcrvdio6O1qxZs6pqmgAAoJ6okYD0yCOP6LvvvvNat3TpUvXr109lZWWaPn264uLi1Lt3b23ZssWrLjU1VZMmTdLll1+utLQ05ebmqnv37jp69KhdU1xcrB49emjBggW688479eSTT+r999/Xvffe6zXWzp07lZSUpKysLE2cOFGDBg3S+PHj9dprr1Xf5AEAQJ0TUN07+OijjzR//nxZlmWvO3r0qEaPHq0WLVpo3bp1Cg0N1ZgxY5ScnKyxY8dq1apVkqQNGzZo3rx5uuOOO5Seni7LsjR48GDFxsZqxowZeuqppyRJc+bM0aZNmzRnzhylpqZKkjp16qRu3bpp1KhRSkpKkiSNGzdOhYWFysrKUocOHSRJLpdLEyZM0N13362QkJDq/jgAAEAdUK0BKScnRw888ID69u2rr7/+2l6/evVq5eTkaNKkSQoNDZUkWZalESNGaNCgQcrNzdUll1yi9PR0SdLkyZPtgBUWFqYBAwYoIyPDDkjp6ekKCwvT8OHD7X0kJycrLi5OGRkZSkpKUkFBgVasWKHevXvb4UiSRo4cqWeffVYrVqxQ//79q/PjAIAqZaUurrF9mbmDamxfwPmgWr9ie/DBB2VZlubPn++1fuvWrZJOhJiTJSYmyuPxaPPmzXad2+32CjQVddu3b9fx48ftui5duqhBgwY+dZs2bZIk7dixQ+Xl5T77jIyMVFRUlF1XmZKSEhUUFHgtAACg/qq2gPTGG2/o/fff1xtvvKHw8HCvbfn5+ZKkmJgYr/URERGSpD179th1LVu29Pp6rqLO4/Fo3759KioqUklJic9YFXUnj1XZPp11lZk+fbrcbre9NGvW7NQTBwAAdV61BKS9e/dq7NixGj16tHr16uWz3ePxSJIaNWrktT44OFiSdOjQIbvOWeOsO9VYFXUnj3UmdZWZPHmyDh8+bC/Z2dmnrAUAAHVflZ+D5PF4dM8996hZs2Z68cUXK60JDg6WZVkyxnitr3hdXFxs11UEm1PVuVwunyNMJ9edPNapnFxXmcDAQAUGBp5yOwAAqF+q/AjSK6+8ov/7v//T7NmzdeTIEeXm5io3N1cej0clJSXKzc1VVFSUjDE+R2JycnIkyT5xOzo6Wvv27fPZx8l1fn5+ioyMPGXdyWNJOm0dAABAlQekjz76SOXl5brpppsUHh5uL9nZ2Xr77bcVHh5un+S8du1ar/dmZWVJkpo2bSpJ6tChg7Kzs7V3797T1q1bt86nl6ysLLsmNjZWoaGhPvvMz8/Xrl277DoAAIAqD0gzZszQp59+6rNERESoR48e+vTTTzVw4EA1b95cc+fOVVlZmf3e+fPny9/f377SrF+/fpKk2bNn2zXHjh3TkiVLlJCQYJ/83b9/f+3evVvLly+36zZu3Kivv/5a3bt3lyT5+/urb9++ysjI0A8//GDXvf766zLG2HUAAABVfg5SYmJipeuDgoLUtGlT3XTTTZKkqVOn6v7779ddd92lESNG6J133lFmZqaGDBmiSy65RJLUqlUrDR48WDNnzpTb7db111+v559/XgcPHlRaWpo99sCBA/Xiiy9qyJAhmjVrlho1aqRHHnlEQUFBXvdGmjRpkjIyMpSSkqLp06dr165dmjJliuLi4io9mRwAAFyYqv1O2qdy3333KS8vT1OmTFFGRoYkqU+fPnr11Ve96ubNmydjjJ5++mlJJ44EPf74417PdXO5XMrMzNTAgQPt9U2aNNGiRYsUHx9v17Vr185+BEmfPn0kSfHx8Vq2bJmCgoKqb7IAAKBOqbGAVNl9hh577DHdc889ysrKUlRUVKVHn0JCQrRw4UI98cQT2rlzpxISEtSiRQufulatWumrr77S+vXrlZ+fr86dO+uiiy7yqevZs6e+++47rV27VgEBAeratavPDSYBAMCFrdaOIFWIiIjQLbfcctq6tm3bqm3btr9YY1mWrr/++tOO5XK51KNHjzPuEQAAXFiq9VEjAAAAdREBCQAAwIGABAAA4EBAAgAAcCAgAQAAONT6VWwAADhZqYtrbF9m7qAa2xfqDo4gAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAAByqLSDNnTtXbdq0UYMGDdS4cWMNGDBABw8e9Kr54IMP1LFjRwUHB+vKK6/UypUrfcbxeDxKS0tTbGysXC6Xevbsqd27d/vU5eXladiwYYqMjJTb7dbw4cNVVFTkU7dt2zalpKTI7XYrOjpas2bNqrI5AwCA+qFaAlJaWppGjx6t2NhYzZkzR8OHD9c777yjXr16qby8XJK0dOlS9evXT2VlZZo+fbri4uLUu3dvbdmyxWus1NRUTZo0SZdffrnS0tKUm5ur7t276+jRo3ZNcXGxevTooQULFujOO+/Uk08+qffff1/33nuv11g7d+5UUlKSsrKyNHHiRA0aNEjjx4/Xa6+9Vh0fAwAAqKMCqnrAnJwcPfPMM3rkkUf0yiuv2OsDAwP1wgsvaP369erQoYNGjx6tFi1aaN26dQoNDdWYMWOUnJyssWPHatWqVZKkDRs2aN68ebrjjjuUnp4uy7I0ePBgxcbGasaMGXrqqackSXPmzNGmTZs0Z84cpaamSpI6deqkbt26adSoUUpKSpIkjRs3ToWFhcrKylKHDh0kSS6XSxMmTNDdd9+tkJCQqv44AABAHVTlR5COHDmiJ554Qs8++6zX+t/+9reSTgSo1atXKycnR6mpqQoNDZUkWZalESNGaM2aNcrNzZUkpaenS5ImT54sy7IkSWFhYRowYIAyMjLssdPT0xUWFqbhw4fb65KTkxUXF2fXFRQUaMWKFUpJSbHDkSSNHDlShYWFWrFiRRV/EgAAoK6q8oDUsmVLPfHEE2rUqJHX+q+++kqSlJCQoK1bt0o6EWJOlpiYKI/Ho82bN0uStm7dKrfb7RVoKuq2b9+u48eP23VdunRRgwYNfOo2bdokSdqxY4fKy8t99hkZGamoqCi7DgAAoEauYsvLy9OCBQuUnJysmJgY5efnS5JiYmK86iIiIiRJe/bskSTl5+erZcuW9tGjk+s8Ho/27dunoqIilZSU+IxVUXfyWJXt01lXmZKSEhUUFHgtAACg/qqRgDRmzBgVFBTo5ZdflnTiyjRJPkeZgoODJUmHDh2y65w1zrpTjVVRd/JYZ1JXmenTp8vtdttLs2bNTlkLAADqvmoPSPPnz9ff/vY3TZ06VR07dpR0IpBYliVjjFdtxevi4mK7rjIn17lcLp8jTCfXnW4sZ11lJk+erMOHD9tLdnb2KWsBAEDdV60B6auvvlJqaqp69+6tJ5980l4fHR0tY4xP0MjJyZEk+8Tt6Oho7du3z2fck+v8/PwUGRl5yrqTx5J02rrKBAYGKjQ01GsBAAD1V7UFpO+//1633nqrYmJitGTJEq+jPBUnXa9du9brPVlZWZKkpk2b2nXZ2dnau3fvaevWrVvn00NWVpZdExsbq9DQUJ995ufna9euXXYdAABAtQSk/fv366abbpLH49FHH33kc8QlMTFRzZs319y5c1VWVmavnz9/vvz9/e0rzfr16ydJmj17tl1z7NgxLVmyRAkJCQoPD5ck9e/fX7t379by5cvtuo0bN+rrr79W9+7dJUn+/v7q27evMjIy9MMPP9h1r7/+uowxdh0AAECV3yhSkgYNGqTdu3crNTVVX3zxhb744gt7W/v27dW+fXtNnTpV999/v+666y6NGDFC77zzjjIzMzVkyBBdcsklkqRWrVpp8ODBmjlzptxut66//no9//zzOnjwoNLS0uwxBw4cqBdffFFDhgzRrFmz1KhRIz3yyCMKCgryujfSpEmTlJGRoZSUFE2fPl27du3SlClTFBcXp169elXHRwEAAOqgKg9IP//8s1avXi3pxPPY5s6d67V96tSpat++ve677z7l5eVpypQp9s0c+/Tpo1dffdWrft68eTLG6Omnn5Z04kjQ448/rqFDh9o1LpdLmZmZGjhwoL2+SZMmWrRokeLj4+26du3a2Y8g6dOnjyQpPj5ey5YtU1BQUBV+CgAAoC6r8oAUFhbmc3XaqTz22GO65557lJWVpaioKCUmJvrUhISEaOHChXriiSe0c+dOJSQkqEWLFj51rVq10ldffaX169crPz9fnTt31kUXXeRT17NnT3333Xdau3atAgIC1LVrV58bTAIAgAtbtXzFdjYiIiJ0yy23nLaubdu2atu27S/WWJal66+//rRjuVwu9ejR44x7BAAAF5YauVEkAABAXUJAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAIaC2GwAA4EJipS6usX2ZuYNqbF/1DUeQAAAAHAhIAAAADgQkAAAABwISAACAAwEJAADAgYAEAADgQEACAABwICABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHC4IAPStm3blJKSIrfbrejoaM2aNau2WwIAAOeRgNpuoKbt3LlTSUlJsixLEydO1KFDhzR+/HgFBQVpxIgRtd0eAAA4D1xwAWncuHEqLCxUVlaWOnToIElyuVyaMGGC7r77boWEhNRyhwAA1E1W6uIa25eZO6hax7+gvmIrKCjQihUrlJKSYocjSRo5cqQKCwu1YsWKWuwOAACcLy6ogLRjxw6Vl5crOTnZa31kZKSioqK0adOmWuoMAACcTy6or9jy8/MlSTExMT7bIiIitGfPnkrfV1JSopKSEvv14cOHJZ04InXWjh87+/f8CufUI3zV558bc6syNf77VoPzY25ViLlViXOZW8V7jDGnLzYXkI8++shIMitXrvTZ1rlzZ3PzzTdX+r6pU6caSSwsLCwsLCz1YMnOzj5tZrigjiAFBwefcpsxRsXFxZVumzx5ssaNG2e/9ng8+vnnn3XxxRfLsqwq79OpoKBAzZo1U3Z2tkJDQ6t9fzWJudVNzK1uYm51V32eX03OzRijwsJCRUVFnbb2ggpI0dHRkqR9+/b5bMvJydEVV1xR6fsCAwMVGBjote6iiy6q8v5OJzQ0tN79YlRgbnUTc6ubmFvdVZ/nV1Nzc7vdZ1R3QZ2kHRsbq9DQUK1du9ZrfX5+vnbt2qWmTZvWUmcAAOB8ckEFJH9/f/Xt21cZGRn64Ycf7PWvv/66jDHq3r17LXYHAADOFxdUQJKkSZMmqbS0VCkpKcrMzNTs2bM1ZcoUxcXFqVevXrXdXqUCAwM1depUn6/56gPmVjcxt7qJudVd9Xl+5+vcLGPO5Fq3+mXFihW69957deDAAUlSfHy8li1bpnbt2tVyZwAA4HxwQQYkSSoqKtLatWsVEBCgrl27qkGDBrXdEgAAOE9csAEJAADgVC64c5AAAABOh4AEAADgcEHdKLIuKisrU0FBgYqLixUaGqpGjRrVdks4A/X558bc6ibmVnfV1/md7/PiCNJ5aNGiRbrtttvUvHlzBQUFKTw8XM2aNZPb7VZoaKhuv/12rVmzprbbrBJlZWX6+eeftX//fh05cqS22/lV6vPPjbkxt/NNfZ6bVH/nV5fmxUna55G8vDzdeuut+uqrr3TTTTcpMTFRERERCg4OljFGOTk5+uqrr7RixQqVlpZq7ty5GjFiRG23fdYWLVqkjIwMbdq0Sfv37/d6qnJISIi6d++uMWPG6IYbbqjFLs9cff65MTfmdr6pz3OT6u/86uS8Tvs4W9SYO++807Ru3dp8++23v1j33//+11xxxRUmJCTE/PTTTzXU3a+Xm5trOnfubAICAkyvXr3ME088YV599VUzf/5885e//MW88MIL5tZbbzVBQUHG39/fzJs3r7ZbPiP1+efG3Jjb+aY+z82Y+ju/ujgvAtJ5JCQkxMyePfuMajMzM41lWebDDz+s5q6qTl38BTkT9fnnxtxOYG7nj/o8N2Pq7/zq4rw4B+k8EhYWpn/9619nVLt7925ZlqVmzZpVc1dVJzMzU6mpqWrVqtUv1kVHRystLU3Hjh1TVlZWDXV37urzz425ncDczh/1eW5S/Z1fnZxXrcYzeHnssceMn5+fmTBhgsnNza20pqioyMyZM8cEBQWZlJSUGu7w12nWrJkZOnToGdXOmTPH+Pn5mc2bN1dzV79eff65MTfmdr6pz3Mzpv7Ory7Oi5O0zyPFxcW6/fbb9fHHH8vf319XXHGFYmJi7Esf9+3bp02bNqmwsFBJSUnKyMjQJZdcUstdn7kJEyZoxowZevTRRzVhwgRdfPHFPjXFxcWaP3++HnvsMSUnJ+vvf/97LXR6durzz425MbfzTX2em1R/51cX50VAOg8tW7ZM6enp2rZtm/Lz8+XxeBQcHKzo6Gh16NBB/fv3V7du3Wq7zbNWF39BzkZ9/blJzI25nX/q89yk+ju/ujQvAhJqXF36BQEAXJgISAAAAA5cxQYAAOBAQAIAAHAgIAEAADgQkAAAABwISAAAAA4EpDrqnXfe0aWXXlrbbeAs1eefG3Orm5hb3VVf53e+zIuAVEe53W61a9euttuoNufLL0hVq88/N+ZWNzG3uqu+zu98mRf3QcJ5aeXKlZo2bZo+//zz2m4FF6jjx4/r7bffVs+ePRUREVHb7eAMfPPNN/rhhx8UERGhK664orbbQR1HQDoPFRUVacWKFdq0aZP27NmjQ4cOqbi4WKGhoWratKl69Oihnj17qmHDhrXdKv5/xhitX79eOTk5SkhIUIsWLSqt27p1q9577z099dRTNdvgr3D48GGtXbtW/v7+6ty5s0JDQyWd+GP07rvvqrS0VF27dq13dz/Py8vTpZdeqpUrVyo5Obm22zkrW7Zs0WWXXSa32+21ftWqVfrkk0+0f/9+XXrpperbt6+SkpJqqcuq8+OPP+qOO+7Q+vXr7XW/+c1vlJaWpt///ve12NnZ8/f3V+vWrfXiiy+qb9++td1OlSorK9O6desUEBCgq6++Wg0bNpQxRn/961+1Zs0alZWVqVOnTnrwwQcVFBRU2+0SkM43f/7znzVhwgQVFBQoKipKERERCg4OljFGOTk52rVrlySpbdu2ysjIUHx8fC13jJycHKWkpOhf//qXva5bt256+eWXlZCQ4FW7ePFi3XPPPSovL6/pNs/JunXr1K9fP/38888yxqhJkybKyMjQsWPHdPvtt+v48eOSJMuydMstt2jp0qUKDAys5a7PzH333feL20tKSrRkyRL17NlTTZs2lWVZev3112uou1/H399fCxcu1MCBAyVJhw4d0s0336wvv/xSJ/8n37IsDR06tM7M61S6deum9evX68EHH1Tbtm114MABLVy4UPv27dPSpUt1xx131HaLZ8zPz0+tW7fWt99+q6SkJE2fPl3XXXddbbf1qx04cEDJycn69ttvJUktWrTQqlWr9Nxzz2nBggVq2LChPB6PysvLFRsbq3/84x+KiYmp3aYNzhuzZs0y/v7+ZurUqebHH3+stCY/P9+89NJLxuVymSuuuMKUlZXVcJdwGjBggAkICDBPPPGEWbZsmRk/frwJCwszDRo0MNOmTfOqXbRokfHz86ulTs9emzZtTExMjFm+fLn5xz/+YXr16mXCwsJM06ZNTc+ePc2uXbvMoUOHzOuvv26CgoLM888/X9stn7HY2FhjWZbx9/c3LVq08FmaN29u/Pz8TGRkpGnRooVp2bJlbbd8xizLMosXL7Zfjx071vj5+Zlp06aZnJwcc+zYMbNu3Tpzww03GD8/P/PGG2/UXrNnKSkpyXz77bf263/+85/Gsizz/vvve9UdPXrUtG3b1vz2t7+t6RZ/lYqf3fr1602XLl2Mn5+fue6668y7775rPB5Pbbd3zoYMGWJCQ0PNggULzKJFi0zr1q1Nx44djb+/v5k2bZopLS01hYWFZuLEicayLHP77bfXdsuGgHQeiY6ONhMnTjyj2lmzZhk/Pz/z+eefV29TOK3GjRubxx57zGtdXl6eGThwoLEsy9xyyy2msLDQGFO3AtLevXuNZVnm73//u73u+PHjpnXr1qZRo0YmLy/Pq37y5MmmTZs2Nd3mOTty5Ih54IEHjGVZ5rbbbjO5uble23NycoxlWeazzz6rpQ7PnTMgNWvWzDzwwAM+dcXFxaZNmzbm2muvrcn2fpXIyEjjcrnMH/7wB1NeXm7ee+8943a7K6198cUXTUhISM02+Cs5f3Yff/yxSU5ONpZlmaioKDNu3DizYcOGWuzw3ERFRZlJkybZr7OysoxlWebqq6/2qW3ZsqW5+OKLa7K9SgXU7vErnOzIkSNq0qTJGdW2bt1a0onvdOuK5ORkWZZ1xvWWZemzzz6rxo6qRnFxsVq1auW1LiwsTIsXL1ZycrJGjx6t6667Th9++GEtdXhuKv5tNW3a1F7XoEED9evXT5s3b1ZYWJhX/W9+8xvt37+/Rnv8NUJCQvSXv/xFt956q4YNG6aEhAS98cYb6t69uySd1b/V811OTo6uvvpqn/WBgYHq3bt3nfqKbfv27Ro1apQmT56s9PR0PfDAA/ZXM/7+/l61gYGBaty4cS11WjV69eqlXr166Z///Kfmzp2r+fPna9asWYqIiNANN9ygG264QSNGjKjtNk/r559/Vps2bezXV155pST5nIYgSXFxcVq1alWN9XYqXOZ/Hrnuuuv08ssva8uWLb9Yd+DAAU2ePFlNmzZV165da6i7X+/yyy/X6tWrtX37dpkTRy9/cfF4PLXd8hlp2bKlli9fXum2Bx54QKtXr1ZOTo6uvfZaZWVl1XB3565ly5YKDw/XW2+95bV+ypQpWrJkiU99enq62rZtW1PtVZk+ffpo69atuuqqq9SrVy898sgj9rlVddn333+v4uJiSVKrVq20e/fuSusKCwvrzHlj0on/+Xj77beVnp6u7Oxspaam6ujRo5ozZ45XXVlZmRYuXKhrr722ljqtWomJiXrjjTf0448/6q9//auuueYaffzxx3rooYdqu7UzEhUVpe+++85+vXHjRknSjh07fGorLiKodbV27Ao+tm3bZpo0aWIaNmxobrvtNjNr1iyzfPlys2rVKrNq1Srz5ptvmvvvv9+43W7jcrnMJ598Utstn7W0tDTjcrnM//3f/9V2K1Vmzpw5xrIs8+ijj5qjR49WWrN7924TExNj/Pz86sxXbMYYs3jxYuPn52f69Oljdu3aVWnNv//9b3PzzTcbPz8/k5GRUcMdVq0FCxYYt9tt2rdvb1atWlVnv2ILCQkxlmWZBg0amGuuucbEx8eb8PBw891333nVff7556Zx48ZmyJAhtdPor3Tw4EHTr18/Y1mW8fPzMzfeeKN57rnnzPPPP2/at29vGjZsaL744ovabvOsOL9i+yVlZWVm7dq11dxR1Zg0aZIJCQkxTz31lElLSzNNmzY1CQkJxrIs8/zzz5vCwkKTl5dnZsyYYSzLMvfff39tt8w5SOebnTt3mt69e5uAgABjWZb9i+/n52csyzJut9vcfffdZvv27bXd6jkbPny4ad68uSkoKKjtVqrMH/7wBxMcHGxeeOGFU9bs37/f3HjjjXUqIBljzKpVq8zVV19tPv3000q3P/nkkyY8PNwsWLCghjurHnv27DFJSUkmICDA+Pn51cmAVF5ebjZv3mzmzZtnhgwZYtq0aWP/Iapwzz33GMuyTJs2bcyBAwdqsdtf74MPPjBt27a1/5tpWZaJi4szmZmZtd3aWTubgFSXlJaWmhEjRpgGDRoYy7JMYmKi+fHHH82zzz5r/42r+DvXpk2bU16oVJO4zP88VVhYqB07dig/P18ej0fBwcGKjo5WbGys/Pzq9jejZWVl+uKLLxQfH6/w8PDabqfK5Obm6siRI6e8B1KF/Pz8Mz7X7HxijKn0vJz8/HyFhob6nP9RlxljNGPGDGVmZuqVV15Rhw4darulXy03N1elpaX2OWWLFy/W0aNHdffdd8vlctVyd1XjP//5j/Ly8hQdHa3mzZvXdjvnZO/evQoPD1dwcHBtt1Itjh07piNHjnh9hfbvf/9bn3zyiYwxiomJUY8ePbgPEgAAwPmobh+KAAAAqAYEJAAAAAcCEgAAgAMBCQAAwIGABAAA4EBAAgAAcCAgAQAAOBCQANSo999/X9dcc40aNWqkpk2batCgQTpw4IC93ePx6Nlnn1Xz5s3ldrt122236bXXXlPr1q2VnJxs1x08eFDDhg1TZGSkGjVqpOuuu06ffPLJOfW0Z88eWZalN998UwcPHtSoUaPUvHlzPf/88151x44d06RJk9SyZUu5XC7FxcVp5syZXjVDhw5VixYtdOjQId1zzz0KCwtTeHi4Ro0a5fOMt3feeUdXXnmlQkJCdNVVV+mtt95Sly5dvG66WVZWpmnTpqlVq1ZyuVy6/PLL9eqrr9aZZxUCdVVAbTcA4MLxj3/8Q/3791fXrl31yiuv6NChQ3r55Zc1aNAg++nds2fP1tSpU/Xoo4+qVatWmjt3riZNmqS5c+faT2Y/dOiQOnfurNLSUo0bN05ut1tLlixRr1699PHHH6tnz57n1N+BAweUmJiogIAA3XTTTWrfvr3X9gceeEBLly7V2LFjFR8fr6ysLI0bN04XX3yx7rnnHruutLRUKSkpCg4O1vTp05WZmal58+YpNjZW48ePl3TiYZ0DBgxQnz59NGbMGGVmZmro0KGaPn26hgwZYo/1+9//Xh9//LHGjBmj2NhYrV+/Xg8//LB+/PFHvfDCC+c0TwBnoBYfcwLgAjNz5kzTr18/r+fwpaWlGcuyTGFhoTHGmFtuucV07NjR3r58+XIjyXz11Vf2uqeeesr4+/ubjRs3mpycHJOTk2P++9//msaNG5uUlJSz7uv77783kkxwcLBJTU015eXlPjUlJSXm9ttv93neXnx8vPmf//kf+/WQIUOMJNOrVy97nOPHj5uIiAhz880323UzZswwkuzPoqCgwEgyaWlpds2qVauMJPO///u/9jxzcnJMcnKyady4sSkrKzvruQI4MxxBAlBjxo4dq7Fjx+rAgQP65JNP9OWXX2rRokUyxig3N1eNGjVSYmKiVq1apc8//1yxsbFKT0+Xv7+/LrvsMnucTz75ROXl5erUqZPPPrZt23bO/bVq1UqzZs2q9HmHDRs2VEZGhjwej77++mutX79ea9as0c6dO72eK1XhpZdessdp0KCB2rRpoyNHjtjbExMTJUlvvvmm+vXrp/fee0+SFBsb6zVPSXrooYf00EMP+exj7969iomJOef5Ajg1AhKAGrNt2zYNGzZMX375pUJDQ5WYmKirrrpKH374oV3To0cPvfDCC+rWrZskKSQkRH/6058UERFh1+Tm5iomJkZ/+tOffPbRoEGDc+5v+PDhv/jQ3ddee01TpkxRbm6uLrvsMnXu3LnShxM3atRIV1xxhdc6Z+hq3769oqOj9cgjj2jMmDGyLEvDhw9X//797Zrc3FxJ0nvvvadGjRr57KeyYAagahCQANSYfv366fjx48rKylKnTp3sE6MrApIxRr///e/16quv6ne/+51yc3MVHx8vt9vtNc7FF1+s77//XjfddJPX+v/85z86dOjQOff3S09QX7dunUaOHKl77rlHL7/8ssLDwyVJN954o0/txRdffNp9Pfzww+ratatmz56tb7/9Vi1btlRUVFSl48TFxSk+Pt5e/+OPP2rPnj0yPGscqDZcxQagRuTm5mrXrl1KSUnRVVddJcuyVFJSooULF9o1xcXFys7OVmBgoFq1aqVrr73WJxxJJ44yHTx40OuqtZKSEt10000aOXJktfT/5ZdfSpJGjhxph6MtW7Zow4YN5zTet99+q8DAQF166aXq3LmzTziSTsxTktdnJJ0IV126dFFZWdk57RvA6XEECUCNuPjiixUVFaWlS5cqLi5OZWVlevPNN/Xtt99KkoqKiuRyuZSQkKCJEydq7969uuyyy9SoUSNFRkYqMTFRLpdLkvTII4/ob3/7m2677TaNHj1aMTExWrp0qf773/9q7ty51dJ/xRVtEydO1N13363t27frz3/+s0pLS1VUVHTW41177bWaO3euQkNDdeWVVyo0NFRhYWHq2LGjfeSoW7duuv322zV9+nTt27dPSUlJ2rhxo9LT0zV+/Hg1adKkSucI4CS1fJI4gAvIxo0bTdeuXU1wcLC59NJLzYMPPmjS09ONJPPSSy8Zj8djpk2bZm8PCAgwkowkEx4ebrZs2WKP9dNPP5n777/fhIeHm5CQEHPttdeav//97+fUV8VVbG+88cYv1s2aNcu0bNnSNGzY0LRr1878+c9/NoMHDzZBQUEmOzvbGHPiKrbLLrvM57033HCDueGGG+zXa9asMQ0bNjRRUVEmMDDQnmeDBg3MwoUL7brS0lLz3HPPmZiYGBMYGGji4uLMzJkzuYINqGaWMXyJDeD88MYbb2jYsGF64okn1KJFCwUEBKikpET/+te/9Mc//lF//OMfq+0rtJq0a9cuxcXFaeDAgbrhhhsUGBio8vJyZWdn66WXXlJKSoqWLl1a220CFzQCEoDzRm5uriZPnqzPPvtM+/fvV2lpqUJDQ9W2bVv17dtXjz766BlfpZaTk6Py8vJfrAkKCtJFF11UBZ2fHY/Ho2nTpik9PV179uzR0aNHFRwcrMsuu0y/+93v9PjjjysyMrLG+wLw/xCQANRLLVq00N69e3+x5uabb9ZHH31UQx0BqEs4SRtAvbR48eLTnjxdcTUaADhxBAkAAMCB+yABAAA4EJAAAAAcCEgAAAAOBCQAAAAHAhIAAIADAQkAAMCBgAQAAOBAQAIAAHD4/wB8SAOeaKeCMwAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 用户年龄分布\n",
    "tags = data_user_info.age_range.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:27.091350300Z",
     "start_time": "2024-09-24T14:04:26.948473300Z"
    }
   },
   "id": "d65ebaaa1d99edd2",
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='gender'>"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkgAAAHBCAYAAACSSUQiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8EElEQVR4nO3df1iUdb7/8deAxg9xRjEDYf0FJpkai2ydbVUMT5gYx1bz5GqW2daKiqV5jkJqaqdiyVol2bVz0jZPuq6Iu+mJ3YNZymKZo2vmr5MnjymsuonKLxFQmPv7hxf317kHEwxF4Pm4rrmuM/e85nO/nTMxr71n5h6bYRiGAAAAYPJq6gEAAABuNRQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAs2jT1AM2Ry+XSyZMn1b59e9lstqYeBwAA1INhGCorK1NISIi8vL77GBEF6TqcPHlSXbt2beoxAADAdSgoKNAPfvCD78xcd0FyuVz65ptv1KZNG3Xv3v16l2mW2rdvL+nyA2y325t4GgAAUB+lpaXq2rWr+Tr+nYzrsGbNGqNz586GJEOSceeddxp/+ctf3DIffPCBERUVZfj5+Rn9+vUzPvroI491ampqjF/+8pdGWFiY4evrawwbNsz4v//7P4/cmTNnjGeeecYICgoy7Ha78Ytf/MK4cOGCR27//v3G8OHDDbvdboSEhBhLliypc/76zPZdSkpKDElGSUlJg+4HAACaTkNevxv8Ie2tW7dqwoQJ+tnPfqa//vWv+vDDD1VdXa2f/vSnKioqkiStW7dOo0aNUnV1tVJTUxUREaERI0Zo3759bmslJSUpOTlZd999t9LS0nTmzBnFxcWpvLzczFRWVmrYsGF699139dhjj2nevHn64IMPNGnSJLe1Dh8+rJiYGDmdTs2ZM0ePP/64Zs2apbffftstV9/ZAABAK9bQ9nXvvfca8fHxbts2btxoSDI2btxonD9/3ujcubPRs2dPs6G5XC5jyJAhRmxsrHkfp9NpSDLGjBljuFwuwzAM4+zZs0aHDh2MRYsWmbnXX3/dkGQsW7bM3PbJJ58Ykozc3Fxz24gRI4w2bdoYe/bsMbe99NJLRvv27Y3z588bhmHUe7Zr4QgSAADNzw07guRyufTKK68oPT3dbbuPj48kydvbW9u2bVNhYaGSkpLMz+fYbDYlJiYqNzdXZ86ckSRlZmZKklJSUsxvggUGBmrs2LHKysoy187MzFRgYKAmT55sbouNjVVERISZKy0tVU5OjuLj4xUVFWXmpkyZorKyMuXk5EhSvWcDAACtW4MKkpeXl4YNG6Y777zT3Hbx4kUtXbpUHTp00MCBA7V//35Jl0vMlaKjo+VyubR3715J0v79++VwONwKTW3u4MGDunjxopkbNGiQ2rZt65Hbs2ePJOnQoUOqqanx2GdwcLBCQkLMXH1nAwAArdt1nyiyrKxMc+bMUb9+/fT5559r/fr16tChg/k5pLCwMLd8UFCQJOnYsWOSpKKiIvXs2dPjPEJBQUFyuVzKz89XRUWFqqqqPNaqzV25Vl37rG/OOptVVVWVSktL3S4AAKDluu6CdPHiRX3xxRc6ceKEfHx8VFFRIeny23CSFBAQ4Jb39/eXJBUXF5s5a8aau9patbkr1/o+OetsVqmpqXI4HOaFcyABANCyXXdB6tSpkzZv3qzjx48rMjJSY8aM0aFDh+Tv7y+bzSbDMNzytdcrKysl/f9SYnVlzs/P76pnqjYM45pr1ZWrz2xWKSkpKikpMS8FBQVX3R8AAGj+vvdvsd1+++369a9/rYsXL+oPf/iDQkNDZRiGR4koLCyUJPPD0aGhocrPz/dY78qcl5eXgoODr5q7ci1J9crVZzYrHx8f2e12twsAAGi5GlSQzp07p3nz5ul//ud/3LbffvvtkqRLly6ZH7rOy8tzyzidTklSly5dJElRUVEqKCjQ8ePHr5nbvn27xyxOp9PMhIeHy263e+yzqKhIR44ccVurPrMBAIDWrUEFyW6369///d81a9Yst7ep/vCHP0iSBg0apOjoaHXr1k0ZGRmqrq42MytWrJC3t7f5DbJRo0ZJktspAy5cuKC1a9cqMjJSnTt3liSNHj1aR48e1aZNm8zc7t279eWXXyouLk7S5dMLjBw5UllZWTpx4oSZW7lypQzDMHP1nQ0AALRyDT3J0sqVKw1JRkxMjJGRkWGkpKQYPj4+RkJCglFTU+OWGTNmjLFlyxZjypQphiRj4sSJbms98cQThiRj4cKFxubNm42YmBhDkvHb3/7WzFy4cMHo3bu30aFDB+O9994zsrKyjK5duxq+vr7GoUOHzNyBAwcMX19fo3///saHH35oLF261PD19TUiIiKMiooKj/mvNdt34USRAAA0Pw15/b6u32LLzMw0BgwYYPj7+xu9evUyFi1a5PHbaK+//rrh4+Nj/l5bQkKCx0Dnz583JkyYYGa8vb2NF1980WN/X3/9tXHvvfeauY4dOxpZWVkeuf/+7/82unTpYub69OljHDhwwCNXn9m+CwUJAIDmpyGv3zbDsHylqxF9++23cjqdCgkJUXR09FVzX331lQ4fPqzIyEj16NGjzoxhGNqxY4eKioo0cOBAdejQoc5cRUWF8vLy1KZNGw0ePNjjBJMNna0upaWlcjgcKikp4QPbAAA0Ew15/b6hBamloiABAND8NOT1+3t/zR8AAKCladPUA+DGsiWtaeoRWgQj4/GmHgEAcBNxBAkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwuK6ClJGRod69e6tt27Zq3769xo4dq9OnT5u379ixQzabzePSoUMHt3VcLpfS0tIUHh4uPz8/PfTQQzp69KjH/s6ePatnn31WwcHBcjgcmjx5sioqKjxyBw4cUHx8vBwOh0JDQ7V06dI659+4caMGDBggf39/9e/fX1u2bLmehwEAALRQbRp6h7S0NCUnJ2v48OF64YUXdOTIES1dulRff/21du3aJW9vb+3cuVO33XabVq5c6Xbf2267ze16UlKSli9froSEBD3//PNatWqV4uLitG/fPrVr106SVFlZqWHDhmnv3r2aNm2aunbtqjfeeEMlJSX6/e9/b651+PBhxcTEyGazac6cOSouLtasWbPk6+urxMREM7du3TqNGzdO/fr1U2pqqvLy8jRixAjt3r1b99xzT0MfDgAA0ALZDMMw6hsuLCxU9+7dlZiYqF/96lfm9rlz5+q1115TXl6eBg0apLFjx+r48eP6/PPPr7rWrl27dN9992nMmDHKzMyUzWbTuXPnFB4erpkzZ+qll16SJC1evFizZ8/WsmXLlJSUJEnaunWrhg4dqtzcXMXExEiSHn74YW3evFlOp1NRUVGSpAULFmjJkiU6deqU2rVrp/LycvXs2VMBAQHau3ev7Ha7DMNQbGysvLy89Mknn9TrcSgtLZXD4VBJSYnsdnt9H74mYUta09QjtAhGxuNNPQIA4HtqyOt3g95iO3/+vObOnauXX37ZbfsPf/hDSZcLlCR9/vnnZnG5mszMTElSSkqKbDabJCkwMFBjx45VVlaWWy4wMFCTJ082t8XGxioiIsLMlZaWKicnR/Hx8WY5kqQpU6aorKxMOTk5kqRt27apsLBQSUlJ5gNjs9mUmJio3NxcnTlzpiEPBwAAaKEaVJB69uypuXPnKiAgwG37zp07JUmRkZE6efKk8vPz9emnn6pbt27y9fVV3759tXTpUtXU1Jj32b9/vxwOh1uhkaTo6GgdPHhQFy9eNHODBg1S27ZtPXJ79uyRJB06dEg1NTWKjY11ywQHByskJMTM7d+/X5I8ctHR0XK5XNq7d29DHg4AANBCfe9vsZ09e1bvvvuuYmNjFRYWpk8//VSSVFRUpKlTp+rNN99UUFCQZs6cqblz55r3KyoqUs+ePc2jR7WCgoLkcrmUn5+viooKVVVVKSwszGO/QUFBOnbsmLmWpOvOBQUFSZKZs6qqqlJpaanbBQAAtFzfuyA999xzKi0t1RtvvCFJ6t69u1555RXt3r1bycnJmjZtmrZs2aJhw4YpIyNDJSUlki5/g816JEqS/P39JUnFxcVyuVySdNVccXGxudb3yV25z7qkpqbK4XCYl65du1718QAAAM3f9ypIK1as0O9+9zstWLBAAwYMkCTdd999mjt3rlk6JMnLy0vPPPOMysvLzSNMV95+pdrPjFdWVsrPz8/jCNOVucrKyu9cq66czWaT9XPpV+6zLikpKSopKTEvBQUFV90fAABo/q67IO3cuVNJSUkaMWKE5s2bd8187df2T548KUkKDQ1Vfn6+R672g952u11eXl4KDg6+aq72g9ahoaGSVK+cYRgeBefKfdbFx8dHdrvd7QIAAFqu6ypI33zzjR555BGFhYVp7dq1bkd55syZo8WLF3vc54svvpAk3XHHHZKkqKgoFRQU6Pjx4245p9MpSerSpYuZ2759u8d6TqfTzISHh8tutysvL88tU1RUpCNHjritJckjZ90nAABo3RpckE6ePKkHH3xQLpdLH374ocfRlPz8fKWlpbmdWTs/P1/p6elq3769HnjgAUnSqFGjJEnp6elm7sKFC1q7dq0iIyPVuXNnSdLo0aN19OhRbdq0yczt3r1bX375peLi4iRJ3t7eGjlypLKysnTixAkzt3LlShmGYeaio6PVrVs3ZWRkqLq62sytWLFC3t7eHt9uAwAArVODThQpXf6K/LZt25SUlKR/+Id/cLvtnnvukc1m03333ac77rhDI0eOVFVVlTZs2KCioiKtWLFCTz/9tJl/8skn9f7772vhwoX6yU9+oldeeUV/+ctf9Nvf/lZPPfWUJKmiokI//OEPdfr0aS1dulQBAQGaOXOmCgsLtWfPHvXp00eSdPDgQf3oRz/SnXfeqdTUVB05ckTJycnq3r279u7dK19fX0nSu+++q5///OcaM2aMEhMTtWHDBi1fvlwTJ07Ue++9V6/HgBNFtj6cKBIAmr+GvH43qCCdO3dOnTp1uurtCxYs0MKFC+V0OpWcnKydO3fKx8dH999/v2bPnq0hQ4a45cvLy5WYmKjVq1dLunwkaM6cOXr11VfdckeOHNH48eO1a9cuSVLHjh31zjvv6NFHH3XL5eTkaNKkSTp16pQkqU+fPlq/fr369u3rllu8eLHmz5+vqqoqSVJCQoLWrFlT77JDQWp9KEgA0PzdsIJ0o3z11Vc6fPiwIiMj1aNHjzozhmFox44dKioq0sCBAz1++LZWRUWF8vLy1KZNGw0ePNjjBJO1vv32WzmdToWEhCg6OrpB81KQWh8KEgA0f82uIDU3FKTWh4IEAM3fDfstNgAAgNaAggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAW11WQMjIy1Lt3b7Vt21bt27fX2LFjdfr0abfMxo0bNWDAAPn7+6t///7asmWLxzoul0tpaWkKDw+Xn5+fHnroIR09etQjd/bsWT377LMKDg6Ww+HQ5MmTVVFR4ZE7cOCA4uPj5XA4FBoaqqVLl9Y5f31mAwAArVeDC1JaWpqmT5+u8PBwLVu2TJMnT9aGDRs0fPhw1dTUSJLWrVunUaNGqbq6WqmpqYqIiNCIESO0b98+t7WSkpKUnJysu+++W2lpaTpz5ozi4uJUXl5uZiorKzVs2DC9++67euyxxzRv3jx98MEHmjRpkttahw8fVkxMjJxOp+bMmaPHH39cs2bN0ttvv+2Wq+9sAACg9bIZhmHUN1xYWKju3bsrMTFRv/rVr8ztc+fO1Wuvvaa8vDxFRUWpZ8+eCggI0N69e2W322UYhmJjY+Xl5aVPPvlEkrRr1y7dd999GjNmjDIzM2Wz2XTu3DmFh4dr5syZeumllyRJixcv1uzZs7Vs2TIlJSVJkrZu3aqhQ4cqNzdXMTExkqSHH35YmzdvltPpVFRUlCRpwYIFWrJkiU6dOqV27dqpvLy8XrNdS2lpqRwOh0pKSmS32+v78DUJW9Kaph6hRTAyHm/qEQAA31NDXr8bdATp/Pnzmjt3rl5++WW37T/84Q8lXS5Q27ZtU2FhoZKSksyd22w2JSYmKjc3V2fOnJEkZWZmSpJSUlJks9kkSYGBgRo7dqyysrLMtTMzMxUYGKjJkyeb22JjYxUREWHmSktLlZOTo/j4eLMcSdKUKVNUVlamnJwcSar3bAAAoHVrUEHq2bOn5s6dq4CAALftO3fulCRFRkZq//79ki6XmCtFR0fL5XJp7969kqT9+/fL4XC4FZra3MGDB3Xx4kUzN2jQILVt29Yjt2fPHknSoUOHVFNT47HP4OBghYSEmLn6zgYAAFq37/0ttrNnz+rdd99VbGyswsLCVFRUJEkKCwtzywUFBUmSjh07JkkqKipSz549zaNHV+ZcLpfy8/NVUVGhqqoqj7Vqc1euVdc+65uzzmZVVVWl0tJStwsAAGi5vndBeu6551RaWqo33nhD0uVvpknyOMrk7+8vSSouLjZz1ow1d7W1anNXrvV9ctbZrFJTU+VwOMxL165d68wBAICW4XsVpBUrVuh3v/udFixYoAEDBki6XDZsNpusn/2uvV5ZWWnm6nJlzs/Pz+MI05W5a61VV64+s1mlpKSopKTEvBQUFFx1fwAAoPm77oK0c+dOJSUlacSIEZo3b565PTQ0VIZheJSIwsJCSTI/HB0aGqr8/HyPda/MeXl5KTg4+Kq5K9eSVK9cfWaz8vHxkd1ud7sAAICW67oK0jfffKNHHnlEYWFhWrt2rdtRntoPXefl5bndx+l0SpK6dOli5goKCnT8+PFr5rZv3+4xg9PpNDPh4eGy2+0e+ywqKtKRI0fc1qrPbAAAoHVrcEE6efKkHnzwQblcLn344YceR1Oio6PVrVs3ZWRkqLq62ty+YsUKeXt7m98gGzVqlCQpPT3dzFy4cEFr165VZGSkOnfuLEkaPXq0jh49qk2bNpm53bt368svv1RcXJwkydvbWyNHjlRWVpZOnDhh5lauXCnDMMxcfWcDAACtW4ML0uOPP66jR49q7Nix+uyzz7R69Wrzsm/fPnl5eWnBggXatWuXxo0bp48//lhTp05Vdna2JkyYoNtvv12S1KtXL02YMEFLlizRokWL9NFHHyk+Pl6nT5/WjBkzzP2NHz9evXv31sSJE7Vq1Spt2LBBo0ePlq+vr9u5kZKTk3Xp0iXFx8crOztb6enpmj9/viIiIjR8+PDL/9h6zgYAAFq3Bp1J+9y5c+rUqdNVb1+wYIEWLlwo6fIZsOfPn6+qqipJUkJCgtasWeN2xKm8vFyJiYlavXq1pMtHgubMmaNXX33Vbd0jR45o/Pjx2rVrlySpY8eOeuedd/Too4+65XJycjRp0iSdOnVKktSnTx+tX79effv2dcvVZ7bvwpm0Wx/OpA0AzV9DXr8bVJAa6ttvv5XT6VRISIiio6Ovmvvqq690+PBhRUZGqkePHnVmDMPQjh07VFRUpIEDB6pDhw515ioqKpSXl6c2bdpo8ODBHieYbOhsdaEgtT4UJABo/m6ZgtRSUZBaHwoSADR/N+y32AAAAFoDChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACw+F4Fadq0aXrggQc8tu/YsUM2m83j0qFDB7ecy+VSWlqawsPD5efnp4ceekhHjx71WO/s2bN69tlnFRwcLIfDocmTJ6uiosIjd+DAAcXHx8vhcCg0NFRLly6tc+6NGzdqwIAB8vf3V//+/bVly5br+ecDAIAWqs313vH111/Xb37zGw0ZMsTjtp07d+q2227TypUr3bbfdtttbteTkpK0fPlyJSQk6Pnnn9eqVasUFxenffv2qV27dpKkyspKDRs2THv37tW0adPUtWtXvfHGGyopKdHvf/97c63Dhw8rJiZGNptNc+bMUXFxsWbNmiVfX18lJiaauXXr1mncuHHq16+fUlNTlZeXpxEjRmj37t265557rvfhAAAALYjNMAyjIXe4dOmSpk+frvfee0/t27dX3759tW3bNrfM2LFjdfz4cX3++edXXWfXrl267777NGbMGGVmZspms+ncuXMKDw/XzJkz9dJLL0mSFi9erNmzZ2vZsmVKSkqSJG3dulVDhw5Vbm6uYmJiJEkPP/ywNm/eLKfTqaioKEnSggULtGTJEp06dUrt2rVTeXm5evbsqYCAAO3du1d2u12GYSg2NlZeXl765JNP6vUYlJaWyuFwqKSkRHa7vSEP301nS1rT1CO0CEbG4009AgDge2rI63eD32L77LPPlJ2drS1btqhv3751Zj7//HOzuFxNZmamJCklJUU2m02SFBgYqLFjxyorK8stFxgYqMmTJ5vbYmNjFRERYeZKS0uVk5Oj+Ph4sxxJ0pQpU1RWVqacnBxJ0rZt21RYWKikpCTzgbHZbEpMTFRubq7OnDnT0IcDAAC0QA0uSHfddZcOHDigQYMG1Xn7yZMnlZ+fr08//VTdunWTr6+v+vbtq6VLl6qmpsbM7d+/Xw6Hw63QSFJ0dLQOHjyoixcvmrlBgwapbdu2Hrk9e/ZIkg4dOqSamhrFxsa6ZYKDgxUSEmLm9u/fL0keuejoaLlcLu3du7eBjwYAAGiJGvwZpKCgoO+8/dNPP5UkFRUVaerUqWrfvr02bNigmTNn6u9//7t++ctfmrf37NnTPHp05foul0v5+fkKDQ1VVVWVwsLC6pwjNzfXXEvSVXPHjh37zlztv6k2Z1VVVaWqqirzemlp6Xc+BgAAoHlr9K/5d+/eXa+88op2796t5ORkTZs2TVu2bNGwYcOUkZGhkpISSZe/wRYQEOBxf39/f0lScXGxXC6XJF01V1xcbK71fXJX7rMuqampcjgc5qVr167f9RAAAIBmrtEL0n333ae5c+eapUOSvLy89Mwzz6i8vNw8wnTl7Veq/cx4ZWWl/Pz8PI4wXZmrrKz8zrXqytlsNlk/l37lPuuSkpKikpIS81JQUHDV/QEAgObvpp0osvZr+ydPnpQkhYaGKj8/3yNXWFgoSbLb7fLy8lJwcPBVc7UftA4NDZWkeuUMw/AoOFfusy4+Pj6y2+1uFwAA0HI1ekGaM2eOFi9e7LH9iy++kCTdcccdkqSoqCgVFBTo+PHjbjmn0ylJ6tKli5nbvn27x3pOp9PMhIeHy263Ky8vzy1TVFSkI0eOuK0lySNn3ScAAGjdGr0g5efnKy0tTadPn3bblp6ervbt25tn3h41apQkKT093cxduHBBa9euVWRkpDp37ixJGj16tI4ePapNmzaZud27d+vLL79UXFycJMnb21sjR45UVlaWTpw4YeZWrlwpwzDMXHR0tLp166aMjAxVV1ebuRUrVsjb29vj220AAKB1avSC9OKLL6q8vFz33nuvpk+frl/84heKiorSmTNntHTpUvPtqV69emnChAlasmSJFi1apI8++kjx8fE6ffq0ZsyYYa43fvx49e7dWxMnTtSqVau0YcMGjR49Wr6+vm7nRkpOTtalS5cUHx+v7Oxspaena/78+YqIiNDw4cMv/2O9vLRgwQLt2rVL48aN08cff6ypU6cqOztbEyZM0O23397YDwcAAGiGrvunRq6mf//+ys3NVXJyst599135+Pjo/vvv1+zZsz1+lmT58uUyDEMLFy6UdPlI0IsvvqinnnrKzPj5+Sk7O1vjx483t3fs2FGrV69Wnz59zFzfvn31wQcfaNKkSUpISJAk9enTR+vXr5evr6+Ze/rpp3X27FnNnz/fPNFkQkKC3nrrrcZ+KAAAQDPV4J8auRG++uorHT58WJGRkerRo0edGcMwtGPHDhUVFWngwIEeP3xbq6KiQnl5eWrTpo0GDx7scYLJWt9++62cTqdCQkIUHR3doHn5qZHWh58aAYDmryGv37dEQWpuKEitDwUJAJq/G/pbbAAAAC0dBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIBFm6YeAEDrYkta09QjtBhGxuNNPQLQYnEECQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALD4XgVp2rRpeuCBB+q8bePGjRowYID8/f3Vv39/bdmyxSPjcrmUlpam8PBw+fn56aGHHtLRo0c9cmfPntWzzz6r4OBgORwOTZ48WRUVFR65AwcOKD4+Xg6HQ6GhoVq6dOl1zwYAAFqv6y5Ir7/+un7zm9/Uedu6des0atQoVVdXKzU1VRERERoxYoT27dvnlktKSlJycrLuvvtupaWl6cyZM4qLi1N5ebmZqays1LBhw/Tuu+/qscce07x58/TBBx9o0qRJbmsdPnxYMTExcjqdmjNnjh5//HHNmjVLb7/99nXNBgAAWi+bYRhGQ+5w6dIlTZ8+Xe+9957at2+vvn37atu2bebt5eXl6tmzpwICArR3717Z7XYZhqHY2Fh5eXnpk08+kSTt2rVL9913n8aMGaPMzEzZbDadO3dO4eHhmjlzpl566SVJ0uLFizV79mwtW7ZMSUlJkqStW7dq6NChys3NVUxMjCTp4Ycf1ubNm+V0OhUVFSVJWrBggZYsWaJTp06pXbt29Z7tWkpLS+VwOFRSUiK73d6Qh++msyWtaeoRWgQj4/GmHqHF4DnZeHheAg3TkNfvBh9B+uyzz5Sdna0tW7aob9++Hrdv27ZNhYWFSkpKMndus9mUmJio3NxcnTlzRpKUmZkpSUpJSZHNZpMkBQYGauzYscrKyjLXy8zMVGBgoCZPnmxui42NVUREhJkrLS1VTk6O4uPjzXIkSVOmTFFZWZlycnIaNBsAAGjdGlyQ7rrrLh04cECDBg2q8/b9+/dLulxirhQdHS2Xy6W9e/eaOYfD4VZoanMHDx7UxYsXzdygQYPUtm1bj9yePXskSYcOHVJNTY3HPoODgxUSEmLm6jsbAABo3do09A5BQUHfeXtRUZEkKSwsrM77HTt2zMz17NnTPHp0Zc7lcik/P1+hoaGqqqryWKs2l5ub+537rM1duc/6zGZVVVWlqqoq83ppaWmdOQAA0DI0+tf8XS6XJCkgIMBtu7+/vySpuLjYzFkz1tzV1qrNXbnW98lZZ7NKTU2Vw+EwL127dq0zBwAAWoZGL0j+/v6y2Wyyfva79nplZaWZq8uVOT8/P48jTFfmrrVWXbn6zGaVkpKikpIS81JQUHDV/QEAgOav0QtSaGioDMPwKBGFhYWSZH44OjQ0VPn5+R73vzLn5eWl4ODgq+auXEtSvXL1mc3Kx8dHdrvd7QIAAFquRi9ItR+6zsvLc9vudDolSV26dDFzBQUFOn78+DVz27dv99iP0+k0M+Hh4bLb7R77LCoq0pEjR9zWqs9sAACgdWv0ghQdHa1u3bopIyND1dXV5vYVK1bI29vb/AbZqFGjJEnp6elm5sKFC1q7dq0iIyPVuXNnSdLo0aN19OhRbdq0yczt3r1bX375peLi4iRJ3t7eGjlypLKysnTixAkzt3LlShmGYebqOxsAAGjdGr0geXl5acGCBdq1a5fGjRunjz/+WFOnTlV2drYmTJig22+/XZLUq1cvTZgwQUuWLNGiRYv00UcfKT4+XqdPn9aMGTPM9caPH6/evXtr4sSJWrVqlTZs2KDRo0fL19fX7dxIycnJunTpkuLj45Wdna309HTNnz9fERERGj58eINmAwAArVuDv+ZfH08//bTOnj2r+fPnmydzTEhI0FtvveWWW758uQzD0MKFCyVdPhL04osv6qmnnjIzfn5+ys7O1vjx483tHTt21OrVq9WnTx8z17dvX/MnSBISEiRJffr00fr16+Xr69vg2QAAQOvV4J8aaYhvv/1WTqdTISEhio6Ovmruq6++0uHDhxUZGakePXrUmTEMQzt27FBRUZEGDhyoDh061JmrqKhQXl6e2rRpo8GDB3ucYLKhs9WFnxppffhJh8bDc7Lx8LwEGqYhr983tCC1VBSk1ocXosbDc7Lx8LwEGuaG/hYbAABAS0dBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABY3pCDV1NQoICBANpvN47J3714zt3HjRg0YMED+/v7q37+/tmzZ4rGWy+VSWlqawsPD5efnp4ceekhHjx71yJ09e1bPPvusgoOD5XA4NHnyZFVUVHjkDhw4oPj4eDkcDoWGhmrp0qWN+U8HAAAtQJsbsej+/ftVXl6uRYsWKSwszO227t27S5LWrVuncePGqV+/fkpNTVVeXp5GjBih3bt365577jHzSUlJWr58uRISEvT8889r1apViouL0759+9SuXTtJUmVlpYYNG6a9e/dq2rRp6tq1q9544w2VlJTo97//vbnW4cOHFRMTI5vNpjlz5qi4uFizZs2Sr6+vEhMTb8RDAQAAmqEbUpB27Nghb29vzZo1yywxVyovL9f06dPVo0cPbd++XXa7Xc8995xiY2M1Y8YMffLJJ5KkXbt2afny5RozZowyMzNls9k0YcIEhYeH680339RLL70kSVq2bJn27NmjZcuWKSkpSZL0ox/9SEOHDtXUqVMVExMjSXrhhRdUVlYmp9OpqKgoSZKfn59mz56tJ554os5ZAQBA63ND3mL7/PPPNWDAgKsWjm3btqmwsFBJSUmy2+2SJJvNpsTEROXm5urMmTOSpMzMTElSSkqKbDabJCkwMFBjx45VVlaWuV5mZqYCAwM1efJkc1tsbKwiIiLMXGlpqXJychQfH2+WI0maMmWKysrKlJOT04iPAAAAaM5uSEHasWOHysrK1K9fP/n6+qpr166aMWOGioqKJF1+C066XGKuFB0dLZfLZX5Oaf/+/XI4HG6FpjZ38OBBXbx40cwNGjRIbdu29cjt2bNHknTo0CHV1NR47DM4OFghISFmDgAAoNHfYissLNTXX3+tTp066ec//7nCwsLkdDr11ltvaefOndqxY4dZlKyfTwoKCpIkHTt2TJJUVFSknj17mkePrsy5XC7l5+crNDRUVVVVHmvV5nJzc8216tpnba52n3WpqqpSVVWVeb20tPQajwIAAGjOGr0gXbp0SS+//LLGjRunXr16SZImT56ssLAwzZs3T1u2bJHL5ZIkBQQEuN3X399fklRcXCzp8jfYrBlrrkuXLnWuVZu7cq365OqSmpqqRYsWXfV2AADQsjT6W2whISGaP3++WY5q/eIXv5Akbd68Wf7+/rLZbDIMwy1Te72yslLS/y9CVlfm/Pz8PI4wXZm71lrWXF1SUlJUUlJiXgoKCq6aBQAAzd9NO1Fk7Qe2T548qdDQUBmG4VE0CgsLJcn84HZoaKjy8/M91roy5+XlpeDg4KvmrlxL0jVzdfHx8ZHdbne7AACAlqvRC9Lq1av15JNPemz/4osvJEl33HGH+aHrvLw8t4zT6ZQk822zqKgoFRQU6Pjx49fMbd++3WOfTqfTzISHh8tut3vss6ioSEeOHDFzAAAAjV6QysvL9f7775vnMpIuvxU2d+5cSdLIkSMVHR2tbt26KSMjQ9XV1WZuxYoV8vb2Nr9pNmrUKElSenq6mblw4YLWrl2ryMhIde7cWZI0evRoHT16VJs2bTJzu3fv1pdffqm4uDhJkre3t0aOHKmsrCydOHHCzK1cuVKGYZg5AACARi9ITzzxhO6880498sgj+vnPf64ZM2bonnvuUW5uriZOnKgHHnhAXl5eWrBggXbt2qVx48bp448/1tSpU5Wdna0JEybo9ttvlyT16tVLEyZM0JIlS7Ro0SJ99NFHio+P1+nTpzVjxgxzn+PHj1fv3r01ceJErVq1Shs2bNDo0aPl6+vrdm6k5ORkXbp0SfHx8crOzlZ6errmz5+viIgIDR8+vLEfCgAA0Ew1+rfY/P39tW3bNiUnJ+uDDz5QVVWV+vbtqxUrVujpp582c08//bTOnj2r+fPnmydzTEhI0FtvveW23vLly2UYhhYuXCjp8pGgF198UU899ZSZ8fPzU3Z2tsaPH29u79ixo1avXq0+ffqYub59++qDDz7QpEmTlJCQIEnq06eP1q9fL19f38Z+KAAAQDNlM6xfJbvJvv32WzmdToWEhCg6Ovqqua+++kqHDx9WZGSkevToUWfGMAzzPEsDBw5Uhw4d6sxVVFQoLy9Pbdq00eDBgz1OMHktpaWlcjgcKikpueU/sG1LWtPUI7QIRsbjTT1Ci8FzsvHwvAQapiGv3zfkt9gaIigoSP/0T/90zdxdd92lu+666zszNptNP/nJT665lp+fn4YNG1bvGQEAQOty077mDwAA0FxQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAAsKEgAAgAUFCQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAAAALChIAAAAFhQkAAAACwoSAACABQUJAADAgoIEAABg0aapBwAAoCnZktY09QgthpHxeFOP0Gg4ggQAAGBBQQIAALCgIAEAAFhQkAAAACwoSAAAABYUJAAAAItWWZAOHDig+Ph4ORwOhYaGaunSpU09EgAAuIW0uvMgHT58WDExMbLZbJozZ46Ki4s1a9Ys+fr6KjExsanHAwAAt4BWV5BeeOEFlZWVyel0KioqSpLk5+en2bNn64knnlC7du2aeEIAANDUWtVbbKWlpcrJyVF8fLxZjiRpypQpKisrU05OThNOBwAAbhWtqiAdOnRINTU1io2NddseHByskJAQ7dmzp4kmAwAAt5JW9RZbUVGRJCksLMzjtqCgIB07dqzO+1VVVamqqsq8XlJSIunyEalb3sULTT1Bi9As/n/dXPCcbDQ8LxsJz8lGc6s/J2vnMwzjmtlWVZBcLpckKSAgwOM2f39/FRcX13m/1NRULVq0yGN7165dG3U+3Loc7/yiqUcAPPC8xK2muTwny8rK5HA4vjPTqgqSv7//VW8zDEOVlZV13paSkqIXXnjBvO5yuXTu3Dl16tRJNput0edsTUpLS9W1a1cVFBTIbrc39TgAz0ncknheNg7DMFRWVqaQkJBrZltVQQoNDZUk5efne9xWWFiofv361Xk/Hx8f+fj4uG3r0KFDo8/Xmtntdv6jxy2F5yRuRTwvv79rHTmq1ao+pB0eHi673a68vDy37UVFRTpy5Ii6dOnSRJMBAIBbSasqSN7e3ho5cqSysrJ04sQJc/vKlStlGIbi4uKacDoAAHCraFUFSZKSk5N16dIlxcfHKzs7W+np6Zo/f74iIiI0fPjwph6v1fHx8dGCBQs83sIEmgrPSdyKeF7efDajPt91a2FycnI0adIknTp1SpLUp08frV+/Xn379m3iyQAAwK2gVRYkSaqoqFBeXp7atGmjwYMHq23btk09EgAAuEW02oIEAABwNa3uM0gAAADXQkECAACwaFUnisStpbq6WqWlpaqsrJTdbq/zJ2AAoDXj72TT4QgSbqrVq1frpz/9qbp16yZfX1917txZXbt2lcPhkN1u16OPPqrc3NymHhOtWHV1tc6dO6eTJ0/q/PnzTT0OWiH+Tt4a+JA2boqzZ8/qkUce0c6dO/Xggw8qOjpaQUFB8vf3l2EYKiws1M6dO5WTk6NLly4pIyNDiYmJTT02WonVq1crKytLe/bs0cmTJ91+6btdu3aKi4vTc889pyFDhjThlGjp+Dt5a6Eg4aYYO3asvvjiC/3pT39Sr169rpo7ceKEhg8frm+++UZHjx7VHXfccROnRGvDCxJuJfydvLVQkHBTBAQE6LXXXtNzzz13zeyf/vQnJSQkaNOmTUpISLgJ06G14gUJtxL+Tt5a+AwSborAwEB98cUX9coePXpUNptNXbt2vcFTobXLzs5WUlLSd5YjSQoNDVVaWpouXLggp9N5k6ZDa8PfyVsLBQk3xc9+9jP953/+p+bMmaOzZ8/WmamsrFRGRob+9V//VQ899JAiIyNv8pRobXhBwq2Ev5O3Ft5iw01RWVmpRx99VH/+85/l7e2tfv36KSwszPzKan5+vvbs2aOysjLFxMQoKytLt99+exNPjZZu9uzZevPNN/Uv//Ivmj17tjp16uSRqays1IoVK/Sv//qvio2N1Z/+9KcmmBStAX8nby0UJNxU69evV2Zmpg4cOKCioiK5XC75+/srNDRUUVFRGj16tIYOHdrUY6KV4AUJtyL+Tt4aKEgAWj1ekABYUZAAAAAs+JA2AACABQUJAADAgoIEAABgQUECAACwoCABAABYUJBwy9mwYQO/dQUA34G/kzceBQm3HIfDob59+zb1GIAbXpBwK+Hv5I3HeZAAoB62bNmiV199VVu3bm3qUQDcBBQk3FQVFRXKycnRnj17dOzYMRUXF6uyslJ2u11dunTRsGHD9NBDD+m2225r6lEB4KYzDEM7duxQYWGhIiMj1aNHjzpz+/fv1x//+Ee99NJLN3fAVoSChJvmP/7jPzR79myVlpYqJCREQUFB8vf3l2EYKiws1JEjRyRJd911l7KystSnT58mnhgAbp7CwkLFx8friy++MLcNHTpUb7zxhiIjI92ya9as0ZNPPqmampqbPWarwWeQcFOkp6dr6tSpmjFjhk6dOqW//e1v+utf/6q8vDxt375dhw8f1tmzZ5WWlqZvvvlGjz32GP/hA2hVpk+fri+//FIpKSlat26dZs6cqT179ujee+/Va6+91tTjtTocQcJN8YMf/EATJkzQL3/5y2tm09PT9cILL+jjjz/WAw88cOOHA4BbgN1uV2Jiol5//XVz27lz5zR9+nStXbtWCQkJ+t3vfqeAgACOIN0EbZp6ALQO58+fV8eOHeuVvfPOOyVJ1dXVN3IkQLGxsbLZbPXO22w2ffzxxzdwIrRmlZWV6tWrl9u2wMBArVmzRrGxsZo+fbruv/9+/dd//VcTTdi68BYbbor7779fb7zxhvbt2/eduVOnTiklJUVdunTR4MGDb9J0aK3uvvtubdu2TQcPHpRhGNe8uFyuph4ZLVjPnj21adOmOm975plntG3bNhUWFurHP/6xnE7nTZ6u9eEtNtwUBw8e1ODBg1VeXq4RI0bogQceUFhYmAICAiRJ+fn5ysvLU1ZWli5evKiNGzcqLi6uiadGa/D6669r4cKF2rJli37yk5809ThoxTIyMvTcc89p1qxZWrRokfz9/T0y33zzjR588EEdO3ZMkniL7QaiIOGm+d///V/NnDlTmzdvNv+jrn17wzAM2e12jRw5UsnJybr77rubclS0MomJifrzn/+sAwcOqH379k09DlqxtLQ0vfzyy5o3b55SUlLqzJw6dUrjx4/XX/7yFwrSDURBwk1XVlamQ4cOqaioSC6XS/7+/goNDVV4eLi8vHjXFzdfdXW1PvvsM/Xp00edO3du6nHQyp05c0bnz5+/6jmQahUVFdX7s51oOAoSAACABf9zHQAAwIKCBAAAYEFBAgAAsKAgAQAAWFCQAKCetm3bJpvNpm3btjX1KABuMAoSAACABQUJAADAgoIEAABgQUEC0Gxs2LBB/fv3V7t27XTvvfdq1apVGjRokLy9vSVdPiP2q6++ql69esnPz09333233nrrLbcfmX3qqafUo0cPFRcX68knn1RgYKA6d+6sqVOn6uLFi2bO5XLplVde0Q9+8AP5+/trxIgROnHihMdM9dnnlfuVpPXr12vQoEFyOBw34FEC0BgoSACahd27d2vs2LEKDw/X0qVLFRoaqqeeekoJCQl6++23JUk/+9nP9Nprr+mf//mftWzZMt1///16/vnnNW/ePLe1Ll26pPj4eJ04cUKpqam6//77tXz5ci1btszMzJ49W/Pnz9eAAQO0ePFi2Ww2TZkyxWOu+u6z1iuvvKLx48erY8eOGjNmTCM+QgAalQEAzcCbb75pSDJKS0sNwzCM0tJSQ5KRlpZmGIZhfPLJJ4Yk49e//rVRWFhoXmJjY4327dsb1dXVhmEYxsSJEw1JxvDhw42amhrDMAzj4sWLRlBQkPHwww8bhmEYhYWFRtu2bY0hQ4YYLpfLMAzDqKmpMQYOHGhIMrZu3dqgfdbu18fHxwgKCjIOHjx4Ux4zANePI0gAmoXo6GhJ0nvvvae//e1veu+99yRJ4eHhkqTNmzdLkqZNm6bOnTubl61bt6qsrEzHjx93W2/x4sXmjyO3bdtWvXv31vnz5yVJTqdTly5d0mOPPSabzSZJ8vLy0qRJk9zWaOg+q6qqlJGRobvvvruxHhYAN0ibph4AAOrjnnvuUWhoqGbOnKnnnntONptNkydP1ujRoyVd/gV0SfrjH/+ogIAAj/vfcccd5v8dEBCgfv36ud1eW5Yk6dy5c5Kk4OBgt0xoaKjb9Ybss/b6o48++t3/UAC3BAoSgGbh+eef1+DBg5Wenq6vv/5aPXv2VEhIiHl7p06dJEkRERHq06ePuf3vf/+7jh07JsMwPLJXU/vh6doCVOvbb791u96QfUqSn5+feUQKwK2Nt9gANAtff/21fHx8dMcdd2jgwIFu5UiShg0bJkl6//333bY///zzGjRokKqrq+u9r3vvvVfe3t7asGGD23br2o25TwC3Fo4gAWgWfvzjHysjI0N2u139+/eX3W5XYGCgBgwYoE6dOmno0KF69NFHlZqaqvz8fMXExGj37t3KzMzUrFmz1LFjx3rvKzg4WM8++6zefvttjR49Wg8++KBycnL06aefuuUac58AbjFN/SlxAKiP3Nxc47bbbjNCQkIMHx8fQ5IhyWjbtq3x/vvvG4ZhGJcuXTL+7d/+zQgLCzN8fHyMiIgIY8mSJR7fJuvevbvH+kOGDDGGDBliXq+urjbmz59vdOnSxfDx8TEeeOAB45133nH7Flt99/ld+wVwa7IZhuVNcgC4xRw5ckQREREaP368hgwZIh8fH9XU1KigoECLFy9WfHy81q1b19RjAmhBKEgAbnkul0uvvvqqMjMzdezYMZWXl8vf31/du3fXP/7jP+rFF1/0+MYZAHwfFCQAAAALvsUGAABgQUECAACwoCABAABYUJAAAAAsKEgAAAAWFCQAAAALChIAAIAFBQkAAMCCggQAAGDx/wAKpF9qxCXmdAAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 用户性别分布\n",
    "tags = data_user_info.gender.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:27.172710300Z",
     "start_time": "2024-09-24T14:04:27.093350300Z"
    }
   },
   "id": "75d804a3b3f29076",
   "execution_count": 15
  },
  {
   "cell_type": "markdown",
   "source": [
    "用户年龄1表示<18岁，2表示18-24岁，3表示25-29岁，4表示30-34岁，5表示35-39岁，6表示40-49岁，7、8表示50岁以上，0表示未知。\n",
    "性别0表示女性，1表示男性，2表示未知。\n",
    "可以看出用户主要集中在25-29岁，女性较多。\n",
    "出于隐私保护，数据采样存在部分偏差，结果并不代表天猫实际情况。"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "dfbc1130779b42c9"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: xlabel='action_type'>"
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAHACAYAAAARCkpCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAhoUlEQVR4nO3df3zP9f7/8ft7pjG2YeZnC/P7rCKdOuknUX5VUnGcImqVOrWiqJQaJxeaKGpHx8kpEg4Jl1LSceSSmrgc9EPKQUjm1MbsvYX32J7fP/p4f3u3yd7zmPfec7teLu8/3q/38/1+Pbb3yW7n9X6932+Pc84JAADASESoBwAAAFULcQEAAEwRFwAAwBRxAQAATBEXAADAFHEBAABMERcAAMAUcQEAAEwRFwAAwFSliYv7779fXbp0Kdd9hw4dKo/Hc8LL2LFjTWcFAAAnFhnqASRp0qRJmj59uq666qpy3X/YsGHq3r17ie2bN29Wenq62rRpc6ojAgCAMvKE8rtFjh49qtTUVM2aNUsxMTFKTk7W6tWrzR6/Z8+e+uGHH7RhwwZFRFSagzQAAFRpIf2Lm5mZqXfffVcrV65UcnKy6WOvWrVKK1as0OTJkwkLAABOo5D+1W3Xrp02b96syy+//DfX/e1vf9O5556rGjVqqHXr1nr66afl8/l+8z5PPPGEunXrpm7dulmODAAATiKk51w0bNjwpGtGjhypqVOn6r777tOIESO0ZcsWTZgwQdu3b9e8efNKvc9HH32kdevWafny5dYjAwCAkwjpORe/dPydIr8852LHjh1q06aNRo0apZEjR/q3jxw5Uq+//rqysrLUqFGjEo91/fXXa+fOndq8eXNFjw0AAH6lUrxb5ERWrlyp4uJipaenKz09vcTtW7ZsKREXO3fu1LvvvquMjIzTNSYAAPiFSh0XOTk5kn4+56Jly5Ylbj///PNLbJszZ46qVaum/v37V/h8AACgpEodF/Hx8ZKkc845J+BzLHJzc7V169ZS7/PGG2+oe/fuSkhIOC0zAgCAQJX6PZrdu3dXRESE5syZE7D92WefVefOnbVv376A7Z9//rm2bdumvn37ns4xAQDAL1TqIxetWrXSiBEjNGXKFHm9XvXp00c7duzQtGnTdPPNN+u8884LWL9ixQpJKvcnfQIAgFNXqd8tctzLL7+sjIwMbd++XU2bNtXgwYP1+OOPq2bNmgHrunXrpi+//FI//vjjaZgYAACUptLEBQAAqBoq9TkXAAAg/BAXAADA1Gk/obO4uFhZWVmKiYmRx+M53bsHAADl4JxTfn6+mjRpctIvBD3tcZGVlaXExMTTvVsAAGBgz549Ovvss39zTVBxUVRUpLi4OP30008lbtu0aZM6dux40seIiYnxDxcbGxvM7gEAQIh4vV4lJib6/47/lqDi4ssvv9RPP/2kcePGKSkpKeC2Zs2alekxjr8UEhsbS1wAABBmynJKQ1BxsXbtWlWrVk2PPPKIatWqVe7BAABA1RXUu0U+/fRTderUibAAAAAnFFRcrF27Vvn5+Tr33HNVo0YNJSYmavjw4crNza2o+QAAQJgp88si2dnZ2rZtm+Lj45WSkqKkpCStX79eL774otatW6e1a9eWej+fzyefz+e/7vV6T31qAABQaZU5Lo4ePaq//OUv+tOf/qRWrVpJkoYNG6akpCSNGTNGK1euDPha9OMmTpyocePG2U0MAAAqtVP+bpHs7Gw1aNBAo0aN0qRJk0rcXtqRi8TEROXl5fFuEQAAwoTX61VcXFyZ/n6f8odoHT+5Mysrq9Tbo6KiFBUVdaq7AQAAYaLMJ3S+8cYbuv3220ts37RpkySpQYMGdlMBAICwVea4+OmnnzRnzhytWrXKv+3IkSN68sknJUk33HCD/XQAACDslPmci0OHDqljx47at2+fBgwYoJiYGL333nvatm2bhgwZolmzZpVph8G8ZgMAACqHCjnnIjo6WqtXr9bjjz+upUuXyufzKTk5WTNnztSdd955ykMDAICq4ZTfLRIsjlwAABB+gvn7HdQndAIAAJwMcQEAAEwRFwAAwNQpf4hWuPE8MDfUI4SEy7gt1CMAAM4QHLkAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABg6pTioqCgQK1atdLQoUONxgEAAOHulOJixIgR2rFjh9UsAACgCih3XCxbtkwzZ86Ux+OxnAcAAIS5csVFdna27rrrLt1www0655xzrGcCAABhrFxxcc8998jj8WjmzJnW8wAAgDAXGewdXnvtNS1dulTLly9XQkJCRcwEAADCWFBxsXv3bg0fPlypqanq2bNnme7j8/nk8/n8171eb3ATAgCAsFLml0WKi4t1++23KzExUZMmTSrzDiZOnKi4uDj/JTExsVyDAgCA8FDmuHj++ef1ySefaNq0aSooKFBOTo5ycnJUXFwsn8+nnJycgCMUx40ePVp5eXn+y549e0x/AAAAULmUOS6WLVumoqIide/eXQkJCf7Lnj179M9//lMJCQmaP39+iftFRUUpNjY24AIAAKquMp9zMWXKFOXm5pbYPmjQIHXo0EGjRo1ScnKy6XAAACD8lDkuLrzwwlK316hRQ40bN1b37t3NhgIAAOGLLy4DAACmgv6ci1/btWuXwRgAAKCq4MgFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPljovi4mLt2LFDu3fvtpwHAACEuXLFxbx589SoUSO1atVKzZs3V5s2bbRmzRrr2QAAQBgKOi4+/PBDDRo0SAMHDtSGDRu0bNkyHTt2TDfeeKNyc3MrYkYAABBGIoO9w2OPPaaePXvqxRdf9G8rKipS3759tWbNGt1www2mAwIAgPASVFwUFxdr/PjxatGiRcD2qKgoSVK1atXsJgMAAGEpqLiIiIjQtddeG7CtsLBQU6dOVZ06dXTZZZeZDgcAAMJP0C+LHJefn6/x48dryZIlys7O1ptvvqk6deqUWOfz+eTz+fzXvV5veXcJAADCQLnfilpYWKhNmzZp7969ioqK0uHDh0tdN3HiRMXFxfkviYmJ5R4WAABUfuWOi/j4eH3wwQfavXu3OnTooFtuuUVbtmwpsW706NHKy8vzX/bs2XNKAwMAgMrtlD+hs379+vrrX/+qwsJCLV68uMTtUVFRio2NDbgAAICqK6i4OHDggMaMGaOvv/46YHv9+vUlSUePHrWbDAAAhKWg4iI2NlYzZszQI488Iuecf/vxIxaXX3657XQAACDsBPVukcjISKWnpyslJUVdunTRgAEDtHfvXj3//PO67rrr1K1bt4qaEwAAhImgz7m48847tXDhQhUUFOjRRx/Vm2++qSeeeEILFy5URARfsgoAwJmuXJ9z0b9/f/Xv3996FgAAUAVwqAEAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAqXLFRUZGhtq0aaPq1asrJiZGf/zjH/Xjjz9azwYAAMJQ0HGRnp6u1NRUtWzZUi+99JKGDRumt956Sz179lRRUVFFzAgAAMJIZDCLs7OzNW7cOI0YMULPP/+8f3tUVJQmTJigtWvX6vLLLzcfEgAAhI+g4qKgoEBPPvmkHnrooYDtHTt2lPRzfAAAgDNbUHHRokULPfnkkyW2r1u3TpLUoUMHm6kAAEDYOuV3i+zfv1+vvvqqunbtqqSkJIuZAABAGAvqyEVpHnzwQXm9Xk2ePLnU230+n3w+n/+61+s91V0CAIBK7JSOXMycOVPz5s1TWlqaOnXqVOqaiRMnKi4uzn9JTEw8lV0CAIBKzuOcc+W547p163TVVVepW7duWrZsmTweT6nrSjtykZiYqLy8PMXGxpZv6lPgeWDuad9nZeAybgv1CACAMOb1ehUXF1emv9/lellk586d6tu3r5KSkjR//vwThoX089tUo6KiyrMbAAAQhoJ+WSQrK0vdu3dXcXGxli1bFpKjDwAAoPIK+sjFbbfdpm+//VYPPPCAMjMzlZmZ6b/t/PPP1/nnn286IAAACC9BxcWBAwe0evVqST9/v0hGRkbA7WlpacQFAABnuKDiol69eirn+Z8AAOAMwVeuAwAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFPEBQAAMEVcAAAAU8QFAAAwRVwAAABTxAUAADBFXAAAAFOnFBf333+/unTpYjQKAACoCsodF5MmTdL06dMtZwEAAFVAZLB3OHr0qFJTUzVr1izVr1+/ImYCAABhLOgjF5mZmXr33Xe1cuVKJScnV8RMAAAgjAV95KJdu3bavHmz4uLiKmIeAAAQ5oKOi4YNGwa13ufzyefz+a97vd5gdwkAAMJIhb8VdeLEiYqLi/NfEhMTK3qXAAAghCo8LkaPHq28vDz/Zc+ePRW9SwAAEEJBvywSrKioKEVFRVX0bgAAQCXBJ3QCAABTxAUAADBFXAAAAFPEBQAAMHVKJ3SuXr3aaAwAAFBVcOQCAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmCIuAACAKeICAACYIi4AAIAp4gIAAJgiLgAAgCniAgAAmIoM9QAAYMXzwNxQjxASLuO2UI8ABODIBQAAMMWRCwBAWOJIVeXFkQsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYKpccbF582b16tVLcXFxatq0qaZOnWo8FgAACFeRwd5h69atuvLKK+XxePTYY4/p4MGDeuSRR1SjRg3de++9FTEjAAAII0HHxcMPP6z8/HytX79eF1xwgSSpZs2aevTRRzV48GDVqlXLfEgAABA+gnpZxOv1asWKFerVq5c/LCTpvvvuU35+vlasWGE+IAAACC9BxcWWLVtUVFSkrl27Bmxv1KiRmjRpoo0bN5oOBwAAwk9QL4vk5uZKkpKSkkrc1rBhQ+3atavEdp/PJ5/P57+el5cn6eejICFReCg0+w2xkP2+Qyxu5IJQjxASeZP/GOoRQoP/vs8sPN8h2a9z7qRrg4qL4uJiSVLt2rVL3BYdHa2DBw+W2D5x4kSNGzeuxPbExMRgdo1TFPfKPaEeAacRz/eZhef7zBLq5zs/P19xcXG/uSaouIiOjj7hbc45HTlypMT20aNH6+GHH/ZfLy4u1oEDBxQfHy+PxxPM7sOa1+tVYmKi9uzZo9jY2FCPgwrG831m4fk+s5ypz7dzTvn5+WrSpMlJ1wYVF02bNpUkfffddyVuy87O1rnnnltie1RUlKKiogK21alTJ5jdVimxsbFn1P8Yz3Q832cWnu8zy5n4fJ/siMVxQZ3Q2bJlS8XGxmrNmjUB23Nzc7V9+3Y1btw4mIcDAABVUFBxUa1aNd1www1atGiR9u7d69/+j3/8Q845XXPNNeYDAgCA8BL0x38//vjjOnr0qHr16qV3331X06ZN01NPPaW2bduqZ8+eFTFjlRAVFaW0tLQSLxGhauL5PrPwfJ9ZeL5PzuPK8p6SX1mxYoXuuOMO7du3T5LUvn17vfnmm0pOTjYfEAAAhJdyxYUkHT58WGvWrFFkZKSuuOIKVa9e3Xo2AAAQhsodFwAAAKUp11euAwAAnAhxAQAATAX9lesom8OHD2vFihXauHGjdu3apYMHD+rIkSOKjY1V48aNde2116pHjx4666yzQj0qgCA457R27VplZ2erQ4cOat68eanrvvzySy1ZskRPP/306R0QpvLy8rRmzRpVq1ZNl112mf9Ds7755hstXrxYR48e1RVXXKGrr746xJNWLpxzUQH+/ve/69FHH5XX61WTJk3UsGFDRUdHyzmn7Oxsbd++XZLUrl07LVq0SO3btw/xxADKIjs7W7169dKmTZv8266++mpNnjxZHTp0CFg7d+5c3X777SoqKjrdY8LIxx9/rH79+unAgQNyzqlu3bpatGiRDh06pJtvvlmFhYWSJI/Ho+uvv14LFizg7an/h5dFjE2bNk1//vOfNXz4cO3bt0/ff/+9NmzYoDVr1ujjjz/W1q1btX//fqWnp2vnzp0aMGAA//gAYSI1NVWff/65Ro8erQULFmjEiBHauHGjLrroIk2YMCHU48FYSkqKYmNjtXTpUi1fvlwXX3yxbrnlFt19993q0qWLtm3bptzcXL3yyitasWKFJk+eHOqRKw8HU02bNnWPPfZYmdZOnTrVRUREuA8//LBihwJgIiYmxo0aNSpg2/79+92tt97qPB6Pu/76611+fr5zzrk33njDRUREhGJMGNi9e7fzeDzuvffe828rLCx0rVu3drVr13b79+8PWD969GjXpk2b0z1mpcU5F8YKCgpUt27dMq1t3bq1JOnYsWMVORIqWNeuXYP6hl+Px6N///vfFTgRKsqRI0fUqlWrgG316tXT3Llz1bVrV6Wmpqpz58565513QjQhrBz/d/mX35lVvXp19evXT5999pnq1asXsP7ss89WVlbWaZ2xMuNlEWOdO3fW5MmT9cUXX/zmun379mn06NFq3LixrrjiitM0HSrC7373O61evVpfffWVnHMnvRQXF4d6ZJRTixYt9Pbbb5d621133aXVq1crOztbl1xyidavX3+ap4OlFi1aKCEhQbNnzw7Y/tRTT2n+/Pkl1i9cuFDt2rU7XeNVfiE7ZlJFbd682dWtW9edddZZ7sYbb3RTp051b7/9tlu1apVbtWqVmzVrlktJSXFxcXGuZs2a7oMPPgj1yDCQnp7uatas6T755JNQj4IK9NJLLzmPx+NGjhzpfvrpp1LXfPvtty4pKclFRETwskiYmzt3rouIiHDXXXed2759e6lrvv76a9enTx8XERHhFi1adJonrLyIiwqwdetW17t3bxcZGek8Ho/zeDz+f2g8Ho+Li4tzgwcPdl999VWoR4WhYcOGuXPOOcd5vd5Qj4IK9Oyzz7ro6Gg3YcKEE67JyspyXbp0IS6qgFWrVrmLL77Y/etf/yr19jFjxriEhAT36quvnubJKjfeilqB8vPztWXLFuXm5qq4uFjR0dFq2rSpWrZsqYgIXpGqao4dO6bMzEy1b99eCQkJoR4HFSgnJ0cFBQUn/IyL43Jzc8t8DhYqN+dcqedW5ebmKjY2VtWqVQvBVJUXcQEAAEzxf58BAIAp4gIAAJgiLgAAgCniAgAAmCIugDNc8+bNNXTo0FCPAaAKIS6AKuizzz7T2LFjdfDgwZOunTp1qoYNG1bxQwVp7NixWr16dajHAFAOvBUVqIJmzZqlO+64Qzt37jzpZzFUVh6PR2lpaRo7dmyoRwEQJI5cAAAAU8QFUEksXbpUf/jDH1S7dm01btxYt912m/bt2+e/vbi4WM8995xat26tmjVrqm3btho/frx8Pp9/jcfjkcfj0R133CHp5y9fOr7tRC8xnOyci9dff13nnXeeoqKilJSUpGeffTbgy9fGjh0rj8cjn8+n4cOHq0GDBqpTp44GDhyovLy8oH4HQ4cO9c8rSePGjfNf/+WMvXr1Uv369VVYWBhw/xEjRigyMlLff/+9/2e78cYbNWbMGDVo0EDR0dHq2bOnvvnmmxL7XrRokX7/+9+rZs2aatasmVJTU4OeH8D/CeFHjwP4P8uXL3cej8ddeeWVbsaMGS49Pd0lJCS4rl27+tcMHjzYSXIDBgxwM2bMcMOGDXOS3J133ulfM2fOHDdnzhx3zz33OEnuhRde8G/73//+V+q+mzVr5oYMGVLqbePHj3eSXLdu3VxGRoYbOHCgk+QGDRrkX5OWluYkuZtvvtl16tTJTZ8+3Q0ZMsRJcg888EBQv4fMzEz/vJJcv379/NczMzP96xYtWuQkucWLF/u3HTt2zDVq1Mj17t074GerUaOGa9iwoXvuuedcWlqai4mJcQkJCQG/j5deeslJcoMHD3avvPKKGzNmjKtVq5a79NJLg5ofwM+IC6ASeOGFF1y/fv0CvvQsPT3deTwel5+f7z766CMnyd13330B97vllltctWrVXEFBQcD21157zUlyO3fuPOm+TxQX3333nYuMjHTXXHONKy4u9m9PTU11ktzKlSudc/8/Ljp06OAOHTrkX9epUyeXnJxclh+/VJJcWlpaqbcVFha6Bg0auL59+/q3vf/++06SW7JkScDP5vF43KZNm/zbFi9eHPDYXq/X1a5d2916660uOzvbfxkzZoyT5NatW1funwE4U/GyCFAJDB8+XIsXL1ZBQYHeeustjRo1Si+88IKcc8rJydHy5cslSffee2/A/RYsWKAjR46oVq1a5jO9//77OnbsmO6///6AL2xKTU2VJC1btixg/fjx41WzZk3/9eTkZBUUFJjPJUnVq1fX4MGD9d577yk7O1uSNGfOHDVq1EjXXXddwNoOHTqoY8eO/ut9+/bVWWedpU8//VSSlJmZqYKCAs2bN08JCQn+y/jx4yVJmzdvrpCfAajKIkM9AICf/4Ddfffd+vTTTxUbG6sLL7xQF110kd555x1J8v8BbdKkScD9IiIiKuwbdn/88UdJUtOmTQO2n3322ZKkH374IWD7JZdcUmK2inTXXXdpypQpmj9/vlJSUrR06VKlpqYqMjLwn7VGjRqVmKtevXr+8ylycnIk/XzuyGWXXVZiP23btq2gnwCoujhyAVQC/fr1U1ZWltavX6+DBw9q1apVuummm/y3169fX5KUlZUVcL9169Zp6NChWrdunflMx782fu/evQHbj19v0KBBwPbjM54u7dq102WXXabZs2dr8eLFOnTokFJSUkqs+/XvrKioSPv37/d/FXp8fLykn3+e7t27+y+XXnqpateuXeGRBFRF/FcDhFhOTo62b9+uXr166aKLLvK/82LOnDn+Nb169ZIkzZgxI+C+Cxcu1OzZs3XWWWcFbD/+h97r9ZZ7rh49eigyMlIvv/yy3C8+DicjI0OS1KdPn3I/dlnEx8efdP6UlBRt3LhRzzzzjK666iq1atWqxJovvvhCGzZs8F9fsmSJjh49qssvv1yS/BExb968gHfBzJ49W507d9bGjRuNfiLgzMHLIkCIxcfHq0mTJlqwYIHatm2rY8eOadasWdq2bZsk6fDhw7ryyis1aNAgTZ8+XTk5Obr66qv1+eef65VXXtFNN92kCy64IOAxL7nkEtWtW1cPPfSQBg0aJK/Xq//+9796+eWXyzxXs2bN9NRTTyktLU09evTQjTfeqE8++UTz5s3TwIEDdc0115j+Hn6td+/emj17ts455xzVrl1bmZmZGjVqlNq3b+9fM2DAAD300EPatm3bCT9sKyYmRn369NHIkSNVUFCgKVOmqEmTJv5PJY2NjdWECRP04IMP6sorr9TAgQOVk5OjKVOmqHPnzurRo0eF/pxAlRTqM0oBOPef//zHXXHFFS46Oto1aNDA3XPPPW7hwoVOknvuueecc84VFRW59PR017JlS1ejRg3Xvn17l56e7goLC0t9zDVr1riLL77Y1ahRw8XExLiUlJRS1/3WW1Gdc+7VV191ycnJrnr16q5Zs2Zu/Pjx7tixY/7bj79b5NeGDBnimjVrVvZfwq8cOHDADRkyxMXHx7vq1au7c8891+3atavEuv79+7u6deu6w4cPl7itWbNmrm/fvm7ixImuYcOGrmbNmq53795u+/btJdYuXLjQXXjhhS4qKsqdffbZ7oEHHnAHDhwo9/zAmYyP/wYQlp555hnFx8friSeeUEpKiqZMmVJiTfPmzdWxY0ctXbr09A8InMF4WQRAhcvOzlZRUdFvrqlRo4bq1KlT5sfMyMiQ1+vVtddeq3Hjxp3ihAAsceQCQIVr3ry5du/e/Ztr+vTpU+KzMwCEJ45cAKhwc+fO1eHDh39zzfG3vgIIfxy5AAAApvicCwAAYIq4AAAApogLAABgirgAAACmiAsAAGCKuAAAAKaICwAAYIq4AAAApv4fPN9jHuxuS6kAAAAASUVORK5CYII="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 用户操作类型分布\n",
    "tags = data_user_log.action_type.value_counts().sort_index()\n",
    "tags.plot.bar()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:27.465808400Z",
     "start_time": "2024-09-24T14:04:27.174710Z"
    }
   },
   "id": "9a4af2fd4572e766",
   "execution_count": 16
  },
  {
   "cell_type": "markdown",
   "source": [
    "操作类型中0表示单击，1表示添加到购物车，2表示购买，3表示添加到收藏夹。\n",
    "大部分用户都只是进行点击操作，添加购物车的比较少，多为直接购买或添加收藏夹。"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "1627c381b3a6fe7f"
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 构建特征"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c66db06426af2063"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 用户特征\n",
    "# 按user_id分组\n",
    "groups = data_user_log.groupby([\"user_id\"])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:55.540054Z",
     "start_time": "2024-09-24T14:04:55.531051700Z"
    }
   },
   "id": "2a33c6cdc1a9d6cf",
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Buffer dtype mismatch, expected 'const int64_t' but got 'int'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mValueError\u001B[0m                                Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[20], line 3\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;66;03m# 统计交互总次数\u001B[39;00m\n\u001B[0;32m      2\u001B[0m temp \u001B[38;5;241m=\u001B[39m groups\u001B[38;5;241m.\u001B[39msize()\u001B[38;5;241m.\u001B[39mreset_index()\u001B[38;5;241m.\u001B[39mrename(columns \u001B[38;5;241m=\u001B[39m {\u001B[38;5;241m0\u001B[39m:\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mu1\u001B[39m\u001B[38;5;124m\"\u001B[39m})\n\u001B[1;32m----> 3\u001B[0m data \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mmerge(data,temp, on \u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124muser_id\u001B[39m\u001B[38;5;124m\"\u001B[39m,how \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mleft\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m      4\u001B[0m data\u001B[38;5;241m.\u001B[39mhead(\u001B[38;5;241m3\u001B[39m)\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:184\u001B[0m, in \u001B[0;36mmerge\u001B[1;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001B[0m\n\u001B[0;32m    169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m    170\u001B[0m     op \u001B[38;5;241m=\u001B[39m _MergeOperation(\n\u001B[0;32m    171\u001B[0m         left_df,\n\u001B[0;32m    172\u001B[0m         right_df,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    182\u001B[0m         validate\u001B[38;5;241m=\u001B[39mvalidate,\n\u001B[0;32m    183\u001B[0m     )\n\u001B[1;32m--> 184\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m op\u001B[38;5;241m.\u001B[39mget_result(copy\u001B[38;5;241m=\u001B[39mcopy)\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:886\u001B[0m, in \u001B[0;36m_MergeOperation.get_result\u001B[1;34m(self, copy)\u001B[0m\n\u001B[0;32m    883\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mindicator:\n\u001B[0;32m    884\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mleft, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mright \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_indicator_pre_merge(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mleft, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mright)\n\u001B[1;32m--> 886\u001B[0m join_index, left_indexer, right_indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_join_info()\n\u001B[0;32m    888\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reindex_and_concat(\n\u001B[0;32m    889\u001B[0m     join_index, left_indexer, right_indexer, copy\u001B[38;5;241m=\u001B[39mcopy\n\u001B[0;32m    890\u001B[0m )\n\u001B[0;32m    891\u001B[0m result \u001B[38;5;241m=\u001B[39m result\u001B[38;5;241m.\u001B[39m__finalize__(\u001B[38;5;28mself\u001B[39m, method\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_merge_type)\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:1151\u001B[0m, in \u001B[0;36m_MergeOperation._get_join_info\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m   1147\u001B[0m     join_index, right_indexer, left_indexer \u001B[38;5;241m=\u001B[39m _left_join_on_index(\n\u001B[0;32m   1148\u001B[0m         right_ax, left_ax, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mright_join_keys, sort\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msort\n\u001B[0;32m   1149\u001B[0m     )\n\u001B[0;32m   1150\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1151\u001B[0m     (left_indexer, right_indexer) \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_join_indexers()\n\u001B[0;32m   1153\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mright_index:\n\u001B[0;32m   1154\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mleft) \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m0\u001B[39m:\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:1125\u001B[0m, in \u001B[0;36m_MergeOperation._get_join_indexers\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m   1123\u001B[0m \u001B[38;5;66;03m# make mypy happy\u001B[39;00m\n\u001B[0;32m   1124\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhow \u001B[38;5;241m!=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124masof\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m-> 1125\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m get_join_indexers(\n\u001B[0;32m   1126\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mleft_join_keys, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mright_join_keys, sort\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msort, how\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhow\n\u001B[0;32m   1127\u001B[0m )\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:1759\u001B[0m, in \u001B[0;36mget_join_indexers\u001B[1;34m(left_keys, right_keys, sort, how)\u001B[0m\n\u001B[0;32m   1757\u001B[0m     _, lidx, ridx \u001B[38;5;241m=\u001B[39m left\u001B[38;5;241m.\u001B[39mjoin(right, how\u001B[38;5;241m=\u001B[39mhow, return_indexers\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, sort\u001B[38;5;241m=\u001B[39msort)\n\u001B[0;32m   1758\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1759\u001B[0m     lidx, ridx \u001B[38;5;241m=\u001B[39m get_join_indexers_non_unique(\n\u001B[0;32m   1760\u001B[0m         left\u001B[38;5;241m.\u001B[39m_values, right\u001B[38;5;241m.\u001B[39m_values, sort, how\n\u001B[0;32m   1761\u001B[0m     )\n\u001B[0;32m   1763\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m lidx \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m is_range_indexer(lidx, \u001B[38;5;28mlen\u001B[39m(left)):\n\u001B[0;32m   1764\u001B[0m     lidx \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:1793\u001B[0m, in \u001B[0;36mget_join_indexers_non_unique\u001B[1;34m(left, right, sort, how)\u001B[0m\n\u001B[0;32m   1770\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_join_indexers_non_unique\u001B[39m(\n\u001B[0;32m   1771\u001B[0m     left: ArrayLike,\n\u001B[0;32m   1772\u001B[0m     right: ArrayLike,\n\u001B[0;32m   1773\u001B[0m     sort: \u001B[38;5;28mbool\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m   1774\u001B[0m     how: JoinHow \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124minner\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m   1775\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28mtuple\u001B[39m[npt\u001B[38;5;241m.\u001B[39mNDArray[np\u001B[38;5;241m.\u001B[39mintp], npt\u001B[38;5;241m.\u001B[39mNDArray[np\u001B[38;5;241m.\u001B[39mintp]]:\n\u001B[0;32m   1776\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m   1777\u001B[0m \u001B[38;5;124;03m    Get join indexers for left and right.\u001B[39;00m\n\u001B[0;32m   1778\u001B[0m \n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1791\u001B[0m \u001B[38;5;124;03m        Indexer into right.\u001B[39;00m\n\u001B[0;32m   1792\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[1;32m-> 1793\u001B[0m     lkey, rkey, count \u001B[38;5;241m=\u001B[39m _factorize_keys(left, right, sort\u001B[38;5;241m=\u001B[39msort)\n\u001B[0;32m   1794\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m how \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mleft\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m   1795\u001B[0m         lidx, ridx \u001B[38;5;241m=\u001B[39m libjoin\u001B[38;5;241m.\u001B[39mleft_outer_join(lkey, rkey, count, sort\u001B[38;5;241m=\u001B[39msort)\n",
      "File \u001B[1;32mD:\\anaconda\\Lib\\site-packages\\pandas\\core\\reshape\\merge.py:2561\u001B[0m, in \u001B[0;36m_factorize_keys\u001B[1;34m(lk, rk, sort)\u001B[0m\n\u001B[0;32m   2554\u001B[0m     rlab \u001B[38;5;241m=\u001B[39m rizer\u001B[38;5;241m.\u001B[39mfactorize(\n\u001B[0;32m   2555\u001B[0m         rk\u001B[38;5;241m.\u001B[39mto_numpy(na_value\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m1\u001B[39m, dtype\u001B[38;5;241m=\u001B[39mlk\u001B[38;5;241m.\u001B[39mdtype\u001B[38;5;241m.\u001B[39mnumpy_dtype), mask\u001B[38;5;241m=\u001B[39mrk\u001B[38;5;241m.\u001B[39misna()\n\u001B[0;32m   2556\u001B[0m     )\n\u001B[0;32m   2557\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m   2558\u001B[0m     \u001B[38;5;66;03m# Argument 1 to \"factorize\" of \"ObjectFactorizer\" has incompatible type\u001B[39;00m\n\u001B[0;32m   2559\u001B[0m     \u001B[38;5;66;03m# \"Union[ndarray[Any, dtype[signedinteger[_64Bit]]],\u001B[39;00m\n\u001B[0;32m   2560\u001B[0m     \u001B[38;5;66;03m# ndarray[Any, dtype[object_]]]\"; expected \"ndarray[Any, dtype[object_]]\"\u001B[39;00m\n\u001B[1;32m-> 2561\u001B[0m     llab \u001B[38;5;241m=\u001B[39m rizer\u001B[38;5;241m.\u001B[39mfactorize(lk)  \u001B[38;5;66;03m# type: ignore[arg-type]\u001B[39;00m\n\u001B[0;32m   2562\u001B[0m     rlab \u001B[38;5;241m=\u001B[39m rizer\u001B[38;5;241m.\u001B[39mfactorize(rk)  \u001B[38;5;66;03m# type: ignore[arg-type]\u001B[39;00m\n\u001B[0;32m   2563\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m llab\u001B[38;5;241m.\u001B[39mdtype \u001B[38;5;241m==\u001B[39m np\u001B[38;5;241m.\u001B[39mdtype(np\u001B[38;5;241m.\u001B[39mintp), llab\u001B[38;5;241m.\u001B[39mdtype\n",
      "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:3045\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.Int64Factorizer.factorize\u001B[1;34m()\u001B[0m\n",
      "\u001B[1;31mValueError\u001B[0m: Buffer dtype mismatch, expected 'const int64_t' but got 'int'"
     ]
    }
   ],
   "source": [
    "# 统计交互总次数\n",
    "temp = groups.size().reset_index().rename(columns = {0:\"u1\"})\n",
    "data = pd.merge(data,temp, on =\"user_id\",how = \"left\")\n",
    "data.head(3)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:59.545628500Z",
     "start_time": "2024-09-24T14:04:59.247588600Z"
    }
   },
   "id": "5ce1bd027e2a0199",
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互天数\n",
    "temp = groups.time_stamp.nunique().reset_index().rename(columns = {\"time_stamp\":\"u2\"})\n",
    "data = data.merge(temp,on =\"user_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.033340200Z"
    }
   },
   "id": "79c48f8e3fcfe087",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互过的商品、品类、品牌、商家数\n",
    "temp = groups[['item_id','cat_id','merchant_id','brand_id']].nunique().reset_index().rename(columns={\n",
    "    'item_id':'u3','cat_id':'u4','merchant_id':'u5','brand_id':'u6'})\n",
    "data = data.merge(temp,on =\"user_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-09-24T14:04:29.094337600Z",
     "start_time": "2024-09-24T14:04:29.034335700Z"
    }
   },
   "id": "f712f9bf438b3039",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计点击、加购物车、购买、收藏的操作次数\n",
    "temp = groups['action_type'].value_counts().unstack().reset_index().rename(columns={0:'u7', 1:'u8', 2:'u9', 3:'u10'})\n",
    "data = data.merge(temp,on =\"user_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.035337800Z"
    }
   },
   "id": "d9c9fc71ddf34eaf",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计购买点击比\n",
    "data[\"u11\"] = data[\"u9\"]/data[\"u7\"]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.037337100Z"
    }
   },
   "id": "5986c3b5f16d7729",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 复购率 = 复购过的商家数/购买过的总商家数\n",
    "# 按user_id,merchant_id分组，购买天数>1则复购标记为1，反之为0\n",
    "groups_rb = data_user_log[data_user_log[\"action_type\"]==2].groupby([\"user_id\",\"merchant_id\"])\n",
    "temp_rb = groups_rb.time_stamp.nunique().reset_index().rename(columns = {\"time_stamp\":\"n_days\"})\n",
    "temp_rb[\"label_um\"] = [(1 if x > 1 else 0) for x in temp_rb[\"n_days\"]]\n",
    "\n",
    "# 与data进行匹配\n",
    "temp = temp_rb.groupby([\"user_id\",\"label_um\"]).size().unstack(fill_value=0).reset_index()\n",
    "temp[\"u12\"] = temp[1]/(temp[0]+temp[1])\n",
    "\n",
    "data = data.merge(temp[[\"user_id\",\"u12\"]],on =\"user_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.038337300Z"
    }
   },
   "id": "c728d7a22ab61656",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 性别、年龄独热编码处理\n",
    "data = data.merge(data_user_info,on =\"user_id\",how = \"left\")\n",
    "\n",
    "temp = pd.get_dummies(data[\"age_range\"],prefix = \"age\")\n",
    "temp2 = pd.get_dummies(data[\"gender\"],prefix = \"gender\")\n",
    "\n",
    "data = pd.concat([data,temp,temp2],axis = 1)\n",
    "data.drop(columns = [\"age_range\",\"gender\"],inplace = True)\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.039336900Z"
    }
   },
   "id": "55c9ad824821f4e8",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 商家特征\n",
    "# 按merchant_id分组\n",
    "groups = data_user_log.groupby([\"merchant_id\"])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.041336700Z"
    }
   },
   "id": "76d9d3967466558b",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互总次数\n",
    "temp = groups.size().reset_index().rename(columns = {0:\"m1\"})\n",
    "data = pd.merge(data,temp, on =\"merchant_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.042336600Z"
    }
   },
   "id": "4daa03797b2c1ad5",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互天数\n",
    "temp = groups.time_stamp.nunique().reset_index().rename(columns = {\"time_stamp\":\"m2\"})\n",
    "data = data.merge(temp,on =\"merchant_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.043337400Z"
    }
   },
   "id": "6603d81aec9ae530",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互过的商品、品类、品牌、用户数\n",
    "temp = groups[['item_id','cat_id','user_id','brand_id']].nunique().reset_index().rename(columns={\n",
    "    'item_id':'m3','cat_id':'m4','user_id':'m5','brand_id':'m6'})\n",
    "data = data.merge(temp,on =\"merchant_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.044336800Z"
    }
   },
   "id": "7cd4364ae44eef54",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计点击、加购物车、购买、收藏的操作次数\n",
    "temp = groups['action_type'].value_counts().unstack().reset_index().rename(columns={0:'m7', 1:'m8', 2:'m9', 3:'m10'})\n",
    "data = data.merge(temp,on =\"merchant_id\",how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.045338800Z"
    }
   },
   "id": "d05a079d0ca86a67",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计购买点击比\n",
    "data[\"m11\"] = data[\"m9\"]/data[\"m7\"]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.047338500Z"
    }
   },
   "id": "9c1fced39894b351",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 复购率 = 复购过的用户数/购买过的总用户数\n",
    "# 按user_id,merchant_id分组，购买天数>1则复购标记为1，反之为0（在上一步已计算）\n",
    "# 与data进行匹配\n",
    "temp = temp_rb.groupby([\"merchant_id\",\"label_um\"]).size().unstack(fill_value=0).reset_index()\n",
    "temp[\"m12\"] = temp[1]/(temp[0]+temp[1])\n",
    "\n",
    "data = data.merge(temp[[\"merchant_id\",\"m12\"]],on =\"merchant_id\",how = \"left\")\n",
    "data.head(3)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.048337800Z"
    }
   },
   "id": "237144a5816feb85",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 用户-商家特征\n",
    "# 按user_id,merchant_id分组\n",
    "groups = data_user_log.groupby(['user_id','merchant_id'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.049338Z"
    }
   },
   "id": "95d17b3a56c6f1f8",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互总次数\n",
    "temp = groups.size().reset_index().rename(columns = {0:\"um1\"})\n",
    "data = pd.merge(data,temp, on =[\"merchant_id\",\"user_id\"],how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.050337500Z"
    }
   },
   "id": "1ea0aa69354059dd",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互天数\n",
    "temp = groups.time_stamp.nunique().reset_index().rename(columns = {\"time_stamp\":\"um2\"})\n",
    "data = data.merge(temp,on =[\"merchant_id\",\"user_id\"],how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.051336900Z"
    }
   },
   "id": "433121f03e74e270",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计交互过的商品、品类、品牌数\n",
    "temp = groups[['item_id','cat_id','brand_id']].nunique().reset_index().rename(columns={\n",
    "    'item_id':'um3','cat_id':'um4','brand_id':'um5'})\n",
    "data = data.merge(temp,on =[\"merchant_id\",\"user_id\"],how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.053337Z"
    }
   },
   "id": "d47913bf8101a3a2",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计点击、加购物车、购买、收藏的操作次数\n",
    "temp = groups['action_type'].value_counts().unstack().reset_index().rename(columns={0:'um6', 1:'um7', 2:'um8', 3:'um9'})\n",
    "data = data.merge(temp,on =[\"merchant_id\",\"user_id\"],how = \"left\")\n",
    "data.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.054336800Z"
    }
   },
   "id": "c5978632e4308c60",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 统计购买点击比\n",
    "data[\"um10\"] = data[\"um8\"]/data[\"um6\"]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.055337300Z"
    }
   },
   "id": "26ad96294070918c",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 将提取好的特征保存，待下次读取\n",
    "data.to_csv(\"features.csv\",index=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.057338100Z"
    }
   },
   "id": "71707947d83aeb61",
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "source": [
    "模型构建"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "1893f429a3e3462c"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 读取之前储存的特征\n",
    "data = pd.read_csv(\"features.csv\")\n",
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.058338800Z"
    }
   },
   "id": "fdccb1807b09450b",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 数据压缩\n",
    "fcols = data.select_dtypes('float').columns\n",
    "icols = data.select_dtypes('integer').columns\n",
    "data[fcols] = data[fcols].apply(pd.to_numeric, downcast='float')\n",
    "data[icols] = data[icols].apply(pd.to_numeric, downcast='integer')\n",
    "\n",
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.059338900Z"
    }
   },
   "id": "a023489a82f61211",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "data.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.060338800Z"
    }
   },
   "id": "1460672320190ced",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 部分列存在许多没有匹配的空值，将空值填充为0\n",
    "data.fillna(0, inplace = True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.060338800Z"
    }
   },
   "id": "fc4ec2e0734542d3",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 拆分train、test数据集\n",
    "train = data[data[\"origin\"]==\"train\"].drop([\"origin\"],axis = 1)\n",
    "test = data[data[\"origin\"]==\"test\"].drop([\"origin\",\"label\"],axis = 1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.061337800Z"
    }
   },
   "id": "5a8d364a97100562",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "X,y = train.drop(['label'],axis=1),train['label'] "
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.062338300Z"
    }
   },
   "id": "e318a95069d14222",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 拆分训练集与验证集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.063337400Z"
    }
   },
   "id": "6c08bb7667b9f936",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 计算train、test集里正样本比例\n",
    "print(\"Ratio of positive samples in train dataset:\",y_train.mean())\n",
    "print(\"Ratio of positive samples in test dataset:\",y_test.mean())\n",
    "# train、valid集正样本比例基本一致。"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.064336900Z"
    }
   },
   "id": "f4d5cd4dca52ee2b",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "from lightgbm import LGBMClassifier\n",
    "# 使用默认参数建模\n",
    "model = LGBMClassifier(max_depth=20)\n",
    "model.fit(X_train,y_train)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.065336700Z"
    }
   },
   "id": "c7db41581e901010",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "from sklearn import metrics as ms\n",
    "\n",
    "prob = model.predict_proba(X_test)[:,1]\n",
    "auc = ms.roc_auc_score(y_test,prob)\n",
    "auc"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.066337200Z"
    }
   },
   "id": "9df2d501a4937249",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "params = {\n",
    "          \"learning_rate\":[0.05,0.1],\n",
    "          \"max_depth\":[10,50,100],\n",
    "          \"subsample\":[0.5],\n",
    "          \"min_split_gain\":[0.05]\n",
    "}\n",
    "\n",
    "grid_search = GridSearchCV(model,params,cv = 3,scoring = \"roc_auc\")\n",
    "grid_search.fit(X_train,y_train)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.067337300Z"
    }
   },
   "id": "aca8ed93838d90f9",
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 调参后的最优参数结果\n",
    "display(grid_search.best_params_)\n",
    "display(grid_search.best_score_)\n",
    "display(grid_search.best_estimator_)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2024-09-24T14:04:29.068337500Z"
    }
   },
   "id": "2d5324ceb6e61abf",
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
